├── data ├── __init__.pyc ├── cityscapes_loader.pyc ├── __init__.py ├── city_utils.py ├── augmentations.py ├── voc_dataset.py ├── gta5_loader.py └── cityscapes_loader.py ├── splits ├── city │ ├── split_0.pkl │ ├── split_1.pkl │ └── split_2.pkl └── voc │ ├── split_0.pkl │ ├── split_1.pkl │ └── split_2.pkl ├── .gitignore ├── requirements.txt ├── Dockerfile ├── utils ├── sync_batchnorm │ ├── __init__.py │ ├── unittest.py │ ├── batchnorm_reimpl.py │ ├── replicate.py │ ├── comm.py │ └── batchnorm.py ├── transformmasks.py ├── translate_labels.py ├── helpers.py ├── color_pascal.py ├── feature_memory.py ├── metric.py ├── class_balancing.py ├── loss.py ├── palette.py ├── color_city.py └── transformsgpu.py ├── configs ├── configSSL_city_1_30_split0_v3.json ├── configSSL_city_1_30_split1_v3.json ├── configSSL_city_1_30_split2_v3.json ├── configSSL_city_1_4_split0_v3.json ├── configSSL_city_1_4_split1_v3.json ├── configSSL_city_1_4_split2_v3.json ├── configSSL_city_1_8_split0_v3.json ├── configSSL_city_1_8_split1_v3.json ├── configSSL_city_1_8_split2_v3.json ├── configSSL_city_1_30_split1_COCO.json ├── configSSL_city_1_30_split2_COCO.json ├── configSSL_pascal_1_20_split0_COCO.json ├── configSSL_pascal_1_20_split1_COCO.json ├── configSSL_pascal_1_20_split2_COCO.json ├── configSSL_pascal_1_50_split0_COCO.json ├── configSSL_pascal_1_8_split1_COCO.json ├── configSSL_city_1_30_split0_COCO.json ├── configSSL_city_1_30_split1_imagenet.json ├── configSSL_city_1_30_split2_imagenet.json ├── configSSL_city_1_4_split1_COCO.json ├── configSSL_city_1_8_split0_COCO.json ├── configSSL_city_1_8_split1_COCO.json ├── configSSL_city_1_8_split2_COCO.json ├── configSSL_pascal_1_20_split0_imagenet.json ├── configSSL_pascal_1_20_split0_v3.json ├── configSSL_pascal_1_20_split1_imagenet.json ├── configSSL_pascal_1_20_split1_v3.json ├── configSSL_pascal_1_20_split2_imagenet.json ├── configSSL_pascal_1_20_split2_v3.json ├── configSSL_pascal_1_50_split0_imagenet.json ├── configSSL_pascal_1_50_split0_v3.json ├── configSSL_pascal_1_50_split1_COCO.json ├── configSSL_pascal_1_50_split1_v3.json ├── configSSL_pascal_1_50_split2_COCO.json ├── configSSL_pascal_1_50_split2_v3.json ├── configSSL_pascal_1_8_split0_COCO.json ├── configSSL_pascal_1_8_split0_v3.json ├── configSSL_pascal_1_8_split1_imagenet.json ├── configSSL_pascal_1_8_split1_v3.json ├── configSSL_pascal_1_8_split2_COCO.json ├── configSSL_pascal_1_8_split2_v3.json ├── configSSL_city_1_30_split0_imagenet.json ├── configSSL_city_1_4_split0_COCO.json ├── configSSL_city_1_4_split1_imagenet.json ├── configSSL_city_1_4_split2_COCO.json ├── configSSL_city_1_8_split0_imagenet.json ├── configSSL_city_1_8_split1_imagenet.json ├── configSSL_city_1_8_split2_imagenet.json ├── configSSL_pascal_1_50_split2_COCO_reduced.json ├── configSSL_pascal_1_8_split2_imagenet.json ├── configSSL_city_1_4_split0_imagenet.json ├── configSSL_city_1_4_split2_imagenet.json ├── configSSL_pascal_1_50_split1_imagenet.json ├── configSSL_pascal_1_50_split2_imagenet.json └── configSSL_pascal_1_8_split0_imagenet.json ├── contrastive_losses.py ├── evaluateSSL.py ├── README.md ├── model ├── deeplabv2_imagenet.py ├── deeplabv2.py └── deeplabv3.py └── LICENSE /data/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shathe/SemiSeg-Contrastive/HEAD/data/__init__.pyc -------------------------------------------------------------------------------- /splits/city/split_0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shathe/SemiSeg-Contrastive/HEAD/splits/city/split_0.pkl -------------------------------------------------------------------------------- /splits/city/split_1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shathe/SemiSeg-Contrastive/HEAD/splits/city/split_1.pkl -------------------------------------------------------------------------------- /splits/city/split_2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shathe/SemiSeg-Contrastive/HEAD/splits/city/split_2.pkl -------------------------------------------------------------------------------- /splits/voc/split_0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shathe/SemiSeg-Contrastive/HEAD/splits/voc/split_0.pkl -------------------------------------------------------------------------------- /splits/voc/split_1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shathe/SemiSeg-Contrastive/HEAD/splits/voc/split_1.pkl -------------------------------------------------------------------------------- /splits/voc/split_2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shathe/SemiSeg-Contrastive/HEAD/splits/voc/split_2.pkl -------------------------------------------------------------------------------- /data/cityscapes_loader.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shathe/SemiSeg-Contrastive/HEAD/data/cityscapes_loader.pyc -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/Cityscapes 2 | data/VOC2012 3 | .idea 4 | */__pycache__/* 5 | __pycache__/* 6 | */*/__pycache__/* 7 | */*/*/__pycache__/* 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.6.0+cu101 2 | torchvision==0.7.0+cu101 3 | kornia==0.4.1 4 | sklearn 5 | tqdm==4.56.0 6 | scikit-build==0.11.1 7 | cmake==3.18.4.post1 8 | opencv-python==4.2.0.34 9 | opencv-contrib-python==4.2.0.34 10 | scipy==1.2 11 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:1.6.0-cuda10.1-cudnn7-devel 2 | RUN apt-get update 3 | RUN apt-get -y install python3-pip 4 | RUN pip3 install --upgrade pip 5 | RUN apt-get install -y libsm6 libxext6 libxrender-dev libgl1-mesa-glx ffmpeg 6 | COPY requirements.txt /tmp/ 7 | RUN pip3 install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html 8 | RUN pip3 install -r /tmp/requirements.txt 9 | 10 | -------------------------------------------------------------------------------- /utils/sync_batchnorm/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : __init__.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d 12 | from .batchnorm import patch_sync_batchnorm, convert_model 13 | from .replicate import DataParallelWithCallback, patch_replication_callback 14 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from data.cityscapes_loader import cityscapesLoader 4 | from data.voc_dataset import VOCDataSet 5 | 6 | def get_loader(name): 7 | """get_loader 8 | :param name: 9 | """ 10 | return { 11 | "cityscapes": cityscapesLoader, 12 | "pascal_voc": VOCDataSet 13 | }[name] 14 | 15 | def get_data_path(name): 16 | """get_data_path 17 | :param name: 18 | :param config_file: 19 | """ 20 | if name == 'cityscapes': 21 | return '../data/CityScapes/' 22 | 23 | if name == 'gta5': 24 | return '../data/GTA5/' 25 | if name == 'pascal_voc': 26 | return '../data/VOC2012/' 27 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_30_split0_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 7, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 4e-4, 13 | "lr_schedule_power": 0.9, 14 | "weight_decay": 5e-4, 15 | "pretraining": "imagenet", 16 | 17 | "data": { 18 | "split_id_list": 0, 19 | "labeled_samples": 100, 20 | "input_size": "512,512" 21 | } 22 | 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_30_split1_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 7, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 4e-4, 13 | "lr_schedule_power": 0.9, 14 | "weight_decay": 5e-4, 15 | "pretraining": "imagenet", 16 | 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 100, 20 | "input_size": "512,512" 21 | } 22 | 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_30_split2_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 7, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 4e-4, 13 | "lr_schedule_power": 0.9, 14 | "weight_decay": 5e-4, 15 | "pretraining": "imagenet", 16 | 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 100, 20 | "input_size": "512,512" 21 | } 22 | 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_4_split0_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 7, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 4e-4, 13 | "lr_schedule_power": 0.9, 14 | "weight_decay": 5e-4, 15 | "pretraining": "imagenet", 16 | 17 | "data": { 18 | "split_id_list": 0, 19 | "labeled_samples": 744, 20 | "input_size": "512,512" 21 | } 22 | 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_4_split1_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 7, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 4e-4, 13 | "lr_schedule_power": 0.9, 14 | "weight_decay": 5e-4, 15 | "pretraining": "imagenet", 16 | 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 744, 20 | "input_size": "512,512" 21 | } 22 | 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_4_split2_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 7, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 4e-4, 13 | "lr_schedule_power": 0.9, 14 | "weight_decay": 5e-4, 15 | "pretraining": "imagenet", 16 | 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 744, 20 | "input_size": "512,512" 21 | } 22 | 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_8_split0_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 7, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 4e-4, 13 | "lr_schedule_power": 0.9, 14 | "weight_decay": 5e-4, 15 | "pretraining": "imagenet", 16 | 17 | "data": { 18 | "split_id_list": 0, 19 | "labeled_samples": 372, 20 | "input_size": "512,512" 21 | } 22 | 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_8_split1_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 7, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 4e-4, 13 | "lr_schedule_power": 0.9, 14 | "weight_decay": 5e-4, 15 | "pretraining": "imagenet", 16 | 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 372, 20 | "input_size": "512,512" 21 | } 22 | 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_8_split2_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 7, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 4e-4, 13 | "lr_schedule_power": 0.9, 14 | "weight_decay": 5e-4, 15 | "pretraining": "imagenet", 16 | 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 372, 20 | "input_size": "512,512" 21 | } 22 | 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_30_split1_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size":5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "COCO", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 100, 20 | "input_size": "512,512" 21 | } 22 | }, 23 | "seed": 5555, 24 | "ignore_label": 250, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_30_split2_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "COCO", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 100, 20 | "input_size": "512,512" 21 | } 22 | }, 23 | "seed": 5555, 24 | "ignore_label": 250, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_20_split0_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "pretraining": "COCO", 14 | "lr_schedule": "Poly", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 0, 19 | "labeled_samples": 530, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_20_split1_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "COCO", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 530, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_20_split2_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "COCO", 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 530, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_50_split0_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "COCO", 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 0, 19 | "labeled_samples": 212, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_8_split1_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "COCO", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 1323, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_30_split0_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "COCO", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 0, 20 | "labeled_samples": 100, 21 | "input_size": "512,512" 22 | } 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_30_split1_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "imagenet", 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 100, 20 | "input_size": "512,512" 21 | } 22 | }, 23 | "seed": 5555, 24 | "ignore_label": 250, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_30_split2_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "imagenet", 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 100, 20 | "input_size": "512,512" 21 | } 22 | }, 23 | "seed": 5555, 24 | "ignore_label": 250, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_4_split1_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "COCO", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 1, 20 | "labeled_samples": 744, 21 | "input_size": "512,512" 22 | } 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_8_split0_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "COCO", 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 0, 20 | "labeled_samples": 372, 21 | "input_size": "512,512" 22 | } 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_8_split1_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "COCO", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 1, 20 | "labeled_samples": 372, 21 | "input_size": "512,512" 22 | } 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_8_split2_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "COCO", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 2, 20 | "labeled_samples": 372, 21 | "input_size": "512,512" 22 | } 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_20_split0_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "pretraining": "imagenet", 14 | "lr_schedule": "Poly", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 0, 19 | "labeled_samples": 530, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_20_split0_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 20, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | "data": { 18 | "split_id_list": 0, 19 | "labeled_samples": 530, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_20_split1_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "pretraining": "imagenet", 14 | "lr_schedule": "Poly", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 530, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_20_split1_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 20, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 530, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_20_split2_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "imagenet", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 530, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_20_split2_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 20, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 530, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_50_split0_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "imagenet", 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 0, 19 | "labeled_samples": 212, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_50_split0_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 20, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | "data": { 18 | "split_id_list": 0, 19 | "labeled_samples": 212, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_50_split1_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "COCO", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 212, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_50_split1_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 20, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 212, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_50_split2_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "COCO", 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 212, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_50_split2_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 20, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 212, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_8_split0_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "COCO", 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 0, 20 | "labeled_samples": 1323, 21 | "input_size": "321,321" 22 | } 23 | }, 24 | "seed": 7, 25 | "ignore_label": 255, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_8_split0_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 20, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | "data": { 18 | "split_id_list": 0, 19 | "labeled_samples": 1323, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_8_split1_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "imagenet", 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 1323, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_8_split1_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 20, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 1323, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_8_split2_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "pretraining": "COCO", 14 | "lr_schedule": "Poly", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 2, 20 | "labeled_samples": 1323, 21 | "input_size": "321,321" 22 | } 23 | }, 24 | "seed": 7, 25 | "ignore_label": 255, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_8_split2_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "3", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 20, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 1323, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_30_split0_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | 18 | "data": { 19 | "split_id_list": 0, 20 | "labeled_samples": 100, 21 | "input_size": "512,512" 22 | } 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_4_split0_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "COCO", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 0, 20 | "labeled_samples": 744, 21 | "input_size": "512,512" 22 | } 23 | 24 | }, 25 | "seed": 5555, 26 | "ignore_label": 250, 27 | 28 | "utils": { 29 | "save_checkpoint_every": 200000, 30 | "checkpoint_dir": "../saved/DeepLab", 31 | "val_per_iter": 1000, 32 | "save_best_model": true 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_4_split1_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "imagenet", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 1, 20 | "labeled_samples": 744, 21 | "input_size": "512,512" 22 | } 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_4_split2_COCO.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "COCO", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | 18 | 19 | "data": { 20 | "split_id_list": 2, 21 | "labeled_samples": 744, 22 | "input_size": "512,512" 23 | } 24 | }, 25 | "seed": 5555, 26 | "ignore_label": 250, 27 | 28 | "utils": { 29 | "save_checkpoint_every": 200000, 30 | "checkpoint_dir": "../saved/DeepLab", 31 | "val_per_iter": 1000, 32 | "save_best_model": true 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_8_split0_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "imagenet", 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 0, 20 | "labeled_samples": 372, 21 | "input_size": "512,512" 22 | } 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_8_split1_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | 18 | "data": { 19 | "split_id_list": 1, 20 | "labeled_samples": 372, 21 | "input_size": "512,512" 22 | } 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_8_split2_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "weight_decay": 5e-4, 16 | "pretraining": "imagenet", 17 | 18 | "data": { 19 | "split_id_list": 2, 20 | "labeled_samples": 372, 21 | "input_size": "512,512" 22 | } 23 | }, 24 | "seed": 5555, 25 | "ignore_label": 250, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_50_split2_COCO_reduced.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 8, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "COCO", 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 212, 20 | "input_size": "256,256" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_8_split2_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations":150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "imagenet", 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 2, 20 | "labeled_samples": 1323, 21 | "input_size": "321,321" 22 | } 23 | }, 24 | "seed": 7, 25 | "ignore_label": 255, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_4_split0_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "imagenet", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 0, 20 | "labeled_samples": 744, 21 | "input_size": "512,512" 22 | } 23 | 24 | }, 25 | "seed": 5555, 26 | "ignore_label": 250, 27 | 28 | "utils": { 29 | "save_checkpoint_every": 200000, 30 | "checkpoint_dir": "../saved/DeepLab", 31 | "val_per_iter": 1000, 32 | "save_best_model": true 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /configs/configSSL_city_1_4_split2_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "cityscapes", 5 | 6 | "training": { 7 | "batch_size": 5, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "imagenet", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | 18 | 19 | "data": { 20 | "split_id_list": 2, 21 | "labeled_samples": 744, 22 | "input_size": "512,512" 23 | } 24 | }, 25 | "seed": 5555, 26 | "ignore_label": 250, 27 | 28 | "utils": { 29 | "save_checkpoint_every": 200000, 30 | "checkpoint_dir": "../saved/DeepLab", 31 | "val_per_iter": 1000, 32 | "save_best_model": true 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_50_split1_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "pretraining": "imagenet", 15 | "lr_schedule_power": 0.9, 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 1, 19 | "labeled_samples": 212, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_50_split2_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "imagenet", 16 | "weight_decay": 5e-4, 17 | "data": { 18 | "split_id_list": 2, 19 | "labeled_samples": 212, 20 | "input_size": "321,321" 21 | } 22 | }, 23 | "seed": 7, 24 | "ignore_label": 255, 25 | 26 | "utils": { 27 | "save_checkpoint_every": 200000, 28 | "checkpoint_dir": "../saved/DeepLab", 29 | "val_per_iter": 1000, 30 | "save_best_model": true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/configSSL_pascal_1_8_split0_imagenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "DeepLab", 3 | "version": "2", 4 | "dataset": "pascal_voc", 5 | 6 | "training": { 7 | "batch_size": 14, 8 | "num_workers": 3, 9 | "optimizer": "SGD", 10 | "momentum": 0.9, 11 | "num_iterations": 150000, 12 | "learning_rate": 2e-4, 13 | "lr_schedule": "Poly", 14 | "lr_schedule_power": 0.9, 15 | "pretraining": "imagenet", 16 | "weight_decay": 5e-4, 17 | 18 | "data": { 19 | "split_id_list": 0, 20 | "labeled_samples": 1323, 21 | "input_size": "321,321" 22 | } 23 | }, 24 | "seed": 7, 25 | "ignore_label": 255, 26 | 27 | "utils": { 28 | "save_checkpoint_every": 200000, 29 | "checkpoint_dir": "../saved/DeepLab", 30 | "val_per_iter": 1000, 31 | "save_best_model": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /utils/sync_batchnorm/unittest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : unittest.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import unittest 12 | import torch 13 | 14 | 15 | class TorchTestCase(unittest.TestCase): 16 | def assertTensorClose(self, x, y): 17 | adiff = float((x - y).abs().max()) 18 | if (y == 0).all(): 19 | rdiff = 'NaN' 20 | else: 21 | rdiff = float((adiff / y).abs().max()) 22 | 23 | message = ( 24 | 'Tensor close check failed\n' 25 | 'adiff={}\n' 26 | 'rdiff={}\n' 27 | ).format(adiff, rdiff) 28 | self.assertTrue(torch.allclose(x, y), message) 29 | 30 | -------------------------------------------------------------------------------- /utils/transformmasks.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code taken from https://github.com/WilhelmT/ClassMix 3 | ''' 4 | 5 | import torch 6 | 7 | ''' 8 | def generate_cloud_mask(img_size, sigma, p,seed=None): 9 | T=10 10 | np.random.seed(seed) 11 | # Randomly draw sigma from log-uniform distribution 12 | N = np.random.normal(size=img_size) # Generate noise image 13 | Ns = gaussian_filter(N, sigma) # Smooth with a Gaussian 14 | Ns_norm = (Ns-Ns.mean())/Ns.std() 15 | Ns_sharp = np.tanh(T*Ns_norm) 16 | Ns_normalised = (Ns_sharp - np.min(Ns_sharp))/np.ptp(Ns_sharp) 17 | return Ns_normalised''' 18 | 19 | def generate_class_mask(pred, classes): # pred 512,512 classes 9 20 | pred, classes = torch.broadcast_tensors(pred.unsqueeze(0), classes.unsqueeze(1).unsqueeze(2)) 21 | N = pred.eq(classes).sum(0) 22 | return N 23 | ''' 24 | def generate_cow_class_mask(pred, classes, sigma, p,): 25 | N=np.zeros(pred.shape) 26 | pred = np.array(pred.cpu()) 27 | for c in classes: 28 | N[pred==c] = generate_cow_mask(pred.shape,sigma,p)[pred==c] 29 | return N''' 30 | -------------------------------------------------------------------------------- /utils/translate_labels.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import cv2 3 | import numpy as np 4 | import tqdm 5 | 6 | colors = [[128, 64, 128], 7 | [244, 35, 232], 8 | [70, 70, 70], 9 | [102, 102, 156], 10 | [190, 153, 153], 11 | [153, 153, 153], 12 | [250, 170, 30], 13 | [220, 220, 0], 14 | [107, 142, 35], 15 | [152, 251, 152], 16 | [70, 130, 180], 17 | [220, 20, 60], 18 | [255, 0, 0], 19 | [0, 0, 142], 20 | [0, 0, 70], 21 | [0, 60, 100], 22 | [0, 80, 100], 23 | [0, 0, 230], 24 | [119, 11, 32], 25 | ] 26 | 27 | 28 | labels = glob.glob('../data/GTA5/labels/*/*.png') 29 | assert len(labels) > 0, "Labels not found in ../data/GTA5/labels/*/*.png" 30 | 31 | for f in tqdm.tqdm(labels): 32 | image = cv2.imread(f) 33 | results = np.ones_like(image[:,:,0]) * 250 34 | 35 | for i in range(len(colors)): 36 | color_i = colors[i] 37 | class_i_image1 = image[:,:,0]==color_i[2] 38 | class_i_image2 = image[:,:,1]==color_i[1] 39 | class_i_image3 = image[:,:,2]==color_i[0] 40 | 41 | class_i_image = class_i_image1 & class_i_image2 & class_i_image3 42 | 43 | results[class_i_image] = i 44 | 45 | 46 | cv2.imwrite(f, results) 47 | 48 | -------------------------------------------------------------------------------- /utils/helpers.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code taken from https://github.com/WilhelmT/ClassMix 3 | ''' 4 | 5 | import os 6 | import torch 7 | import torch.nn as nn 8 | import numpy as np 9 | import PIL 10 | 11 | def dir_exists(path): 12 | if not os.path.exists(path): 13 | os.makedirs(path) 14 | 15 | def initialize_weights(*models): 16 | for model in models: 17 | for m in model.modules(): 18 | if isinstance(m, nn.Conv2d): 19 | nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu') 20 | elif isinstance(m, nn.BatchNorm2d): 21 | m.weight.data.fill_(1.) 22 | m.bias.data.fill_(1e-4) 23 | elif isinstance(m, nn.Linear): 24 | m.weight.data.normal_(0.0, 0.0001) 25 | m.bias.data.zero_() 26 | 27 | def get_upsampling_weight(in_channels, out_channels, kernel_size): 28 | factor = (kernel_size + 1) // 2 29 | if kernel_size % 2 == 1: 30 | center = factor - 1 31 | else: 32 | center = factor - 0.5 33 | og = np.ogrid[:kernel_size, :kernel_size] 34 | filt = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor) 35 | weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size), dtype=np.float64) 36 | weight[list(range(in_channels)), list(range(out_channels)), :, :] = filt 37 | return torch.from_numpy(weight).float() 38 | 39 | def colorize_mask(mask, palette): 40 | zero_pad = 256 * 3 - len(palette) 41 | for i in range(zero_pad): 42 | palette.append(0) 43 | new_mask = PIL.Image.fromarray(mask.astype(np.uint8)).convert('P') 44 | new_mask.putpalette(palette) 45 | return new_mask 46 | 47 | def set_trainable_attr(m,b): 48 | m.trainable = b 49 | for p in m.parameters(): p.requires_grad = b 50 | 51 | def apply_leaf(m, f): 52 | c = m if isinstance(m, (list, tuple)) else list(m.children()) 53 | if isinstance(m, nn.Module): 54 | f(m) 55 | if len(c)>0: 56 | for l in c: 57 | apply_leaf(l,f) 58 | 59 | def set_trainable(l, b): 60 | apply_leaf(l, lambda m: set_trainable_attr(m,b)) 61 | -------------------------------------------------------------------------------- /utils/color_pascal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python implementation of the color map function for the PASCAL VOC data set. 3 | Official Matlab version can be found in the PASCAL VOC devkit 4 | http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#devkit 5 | """ 6 | from __future__ import print_function 7 | import numpy as np 8 | from skimage.io import imshow 9 | import matplotlib.pyplot as plt 10 | import os 11 | import numpy as np 12 | import glob 13 | import cv2 14 | import random 15 | import scipy 16 | import argparse 17 | from PIL import Image 18 | 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("--input_dir", help="Dataset to train", default='./out_dir/Datasets/cityscapes') 21 | parser.add_argument("--output_dir", help="Dataset to train", default='./out_dir/Datasets/cityscapes_colored') 22 | args = parser.parse_args() 23 | from collections import namedtuple 24 | input_dir = args.input_dir 25 | output_dir = args.output_dir 26 | 27 | 28 | 29 | def color_map(N=256, normalized=False): 30 | def bitget(byteval, idx): 31 | return ((byteval & (1 << idx)) != 0) 32 | 33 | dtype = 'float32' if normalized else 'uint8' 34 | cmap = np.zeros((N, 3), dtype=dtype) 35 | for i in range(N): 36 | r = g = b = 0 37 | c = i 38 | for j in range(8): 39 | r = r | (bitget(c, 0) << 7-j) 40 | g = g | (bitget(c, 1) << 7-j) 41 | b = b | (bitget(c, 2) << 7-j) 42 | c = c >> 3 43 | 44 | cmap[i] = np.array([r, g, b]) 45 | 46 | cmap = cmap/255 if normalized else cmap 47 | return cmap 48 | 49 | 50 | if not os.path.exists(output_dir): 51 | os.makedirs(output_dir) 52 | 53 | labels = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 54 | 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor', 55 | 'void'] 56 | nclasses = 21 57 | row_size = 50 58 | col_size = 500 59 | cmap = color_map() 60 | 61 | outputs = glob.glob(input_dir + '/*') 62 | for output in outputs: 63 | name = output.split('/')[-1] 64 | output_name = output_dir + name 65 | print(output_name) 66 | target = np.array(Image.open(output))[:, :, np.newaxis] 67 | cmap = color_map()[:, np.newaxis, :] 68 | new_im = np.dot(target == 0, cmap[0]) 69 | for i in range(1, cmap.shape[0]): 70 | new_im += np.dot(target == i, cmap[i]) 71 | new_im = Image.fromarray(new_im.astype(np.uint8)) 72 | new_im.save(output_name) 73 | 74 | -------------------------------------------------------------------------------- /data/city_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code taken from https://github.com/WilhelmT/ClassMix 3 | Misc Utility functions 4 | """ 5 | from collections import OrderedDict 6 | import os 7 | import numpy as np 8 | 9 | 10 | def recursive_glob(rootdir=".", suffix=""): 11 | """Performs recursive glob with given suffix and rootdir 12 | :param rootdir is the root directory 13 | :param suffix is the suffix to be searched 14 | """ 15 | return [ 16 | os.path.join(looproot, filename) 17 | for looproot, _, filenames in os.walk(rootdir) 18 | for filename in filenames 19 | if filename.endswith(suffix) 20 | ] 21 | 22 | 23 | def poly_lr_scheduler( 24 | optimizer, init_lr, iter, lr_decay_iter=1, max_iter=30000, power=0.9 25 | ): 26 | """Polynomial decay of learning rate 27 | :param init_lr is base learning rate 28 | :param iter is a current iteration 29 | :param lr_decay_iter how frequently decay occurs, default is 1 30 | :param max_iter is number of maximum iterations 31 | :param power is a polymomial power 32 | 33 | """ 34 | if iter % lr_decay_iter or iter > max_iter: 35 | return optimizer 36 | 37 | for param_group in optimizer.param_groups: 38 | param_group["lr"] = init_lr * (1 - iter / max_iter) ** power 39 | 40 | 41 | def adjust_learning_rate(optimizer, init_lr, epoch): 42 | """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" 43 | lr = init_lr * (0.1 ** (epoch // 30)) 44 | for param_group in optimizer.param_groups: 45 | param_group["lr"] = lr 46 | 47 | 48 | def alpha_blend(input_image, segmentation_mask, alpha=0.5): 49 | """Alpha Blending utility to overlay RGB masks on RBG images 50 | :param input_image is a np.ndarray with 3 channels 51 | :param segmentation_mask is a np.ndarray with 3 channels 52 | :param alpha is a float value 53 | 54 | """ 55 | blended = np.zeros(input_image.size, dtype=np.float32) 56 | blended = input_image * alpha + segmentation_mask * (1 - alpha) 57 | return blended 58 | 59 | 60 | def convert_state_dict(state_dict): 61 | """Converts a state dict saved from a dataParallel module to normal 62 | module state_dict inplace 63 | :param state_dict is the loaded DataParallel model_state 64 | 65 | """ 66 | new_state_dict = OrderedDict() 67 | for k, v in state_dict.items(): 68 | name = k[7:] # remove `module.` 69 | new_state_dict[name] = v 70 | return new_state_dict 71 | -------------------------------------------------------------------------------- /utils/sync_batchnorm/batchnorm_reimpl.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : batchnorm_reimpl.py 4 | # Author : acgtyrant 5 | # Date : 11/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.init as init 14 | 15 | __all__ = ['BatchNorm2dReimpl'] 16 | 17 | 18 | class BatchNorm2dReimpl(nn.Module): 19 | """ 20 | A re-implementation of batch normalization, used for testing the numerical 21 | stability. 22 | 23 | Author: acgtyrant 24 | See also: 25 | https://github.com/vacancy/Synchronized-BatchNorm-PyTorch/issues/14 26 | """ 27 | def __init__(self, num_features, eps=1e-5, momentum=0.1): 28 | super().__init__() 29 | 30 | self.num_features = num_features 31 | self.eps = eps 32 | self.momentum = momentum 33 | self.weight = nn.Parameter(torch.empty(num_features)) 34 | self.bias = nn.Parameter(torch.empty(num_features)) 35 | self.register_buffer('running_mean', torch.zeros(num_features)) 36 | self.register_buffer('running_var', torch.ones(num_features)) 37 | self.reset_parameters() 38 | 39 | def reset_running_stats(self): 40 | self.running_mean.zero_() 41 | self.running_var.fill_(1) 42 | 43 | def reset_parameters(self): 44 | self.reset_running_stats() 45 | init.uniform_(self.weight) 46 | init.zeros_(self.bias) 47 | 48 | def forward(self, input_): 49 | batchsize, channels, height, width = input_.size() 50 | numel = batchsize * height * width 51 | input_ = input_.permute(1, 0, 2, 3).contiguous().view(channels, numel) 52 | sum_ = input_.sum(1) 53 | sum_of_square = input_.pow(2).sum(1) 54 | mean = sum_ / numel 55 | sumvar = sum_of_square - sum_ * mean 56 | 57 | self.running_mean = ( 58 | (1 - self.momentum) * self.running_mean 59 | + self.momentum * mean.detach() 60 | ) 61 | unbias_var = sumvar / (numel - 1) 62 | self.running_var = ( 63 | (1 - self.momentum) * self.running_var 64 | + self.momentum * unbias_var.detach() 65 | ) 66 | 67 | bias_var = sumvar / numel 68 | inv_std = 1 / (bias_var + self.eps).pow(0.5) 69 | output = ( 70 | (input_ - mean.unsqueeze(1)) * inv_std.unsqueeze(1) * 71 | self.weight.unsqueeze(1) + self.bias.unsqueeze(1)) 72 | 73 | return output.view(channels, batchsize, height, width).permute(1, 0, 2, 3).contiguous() 74 | 75 | -------------------------------------------------------------------------------- /contrastive_losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | def contrastive_class_to_class_learned_memory(model, features, class_labels, num_classes, memory): 5 | """ 6 | 7 | Args: 8 | model: segmentation model that contains the self-attention MLPs for selecting the features 9 | to take part in the contrastive learning optimization 10 | features: Nx256 feature vectors for the contrastive learning (after applying the projection and prediction head) 11 | class_labels: N corresponding class labels for every feature vector 12 | num_classes: number of classesin the dataet 13 | memory: memory bank [List] 14 | 15 | Returns: 16 | returns the contrastive loss between features vectors from [features] and from [memory] in a class-wise fashion. 17 | """ 18 | 19 | loss = 0 20 | 21 | for c in range(num_classes): 22 | # get features of an specific class 23 | mask_c = class_labels == c 24 | features_c = features[mask_c,:] 25 | memory_c = memory[c] # N, 256 26 | 27 | # get the self-attention MLPs both for memory features vectors (projected vectors) and network feature vectors (predicted vectors) 28 | selector = model.__getattr__('contrastive_class_selector_' + str(c)) 29 | selector_memory = model.__getattr__('contrastive_class_selector_memory' + str(c)) 30 | 31 | if memory_c is not None and features_c.shape[0] > 1 and memory_c.shape[0] > 1: 32 | 33 | memory_c = torch.from_numpy(memory_c).cuda() 34 | 35 | # L2 normalize vectors 36 | memory_c = F.normalize(memory_c, dim=1) # N, 256 37 | features_c_norm = F.normalize(features_c, dim=1) # M, 256 38 | 39 | # compute similarity. All elements with all elements 40 | similarities = torch.mm(features_c_norm, memory_c.transpose(1, 0)) # MxN 41 | distances = 1 - similarities # values between [0, 2] where 0 means same vectors 42 | # M (elements), N (memory) 43 | 44 | 45 | # now weight every sample 46 | 47 | learned_weights_features = selector(features_c.detach()) # detach for trainability 48 | learned_weights_features_memory = selector_memory(memory_c) 49 | 50 | # self-atention in the memory featuers-axis and on the learning contrsative featuers-axis 51 | learned_weights_features = torch.sigmoid(learned_weights_features) 52 | rescaled_weights = (learned_weights_features.shape[0] / learned_weights_features.sum(dim=0)) * learned_weights_features 53 | rescaled_weights = rescaled_weights.repeat(1, distances.shape[1]) 54 | distances = distances * rescaled_weights 55 | 56 | 57 | learned_weights_features_memory = torch.sigmoid(learned_weights_features_memory) 58 | learned_weights_features_memory = learned_weights_features_memory.permute(1, 0) 59 | rescaled_weights_memory = (learned_weights_features_memory.shape[0] / learned_weights_features_memory.sum(dim=0)) * learned_weights_features_memory 60 | rescaled_weights_memory = rescaled_weights_memory.repeat(distances.shape[0], 1) 61 | distances = distances * rescaled_weights_memory 62 | 63 | 64 | loss = loss + distances.mean() 65 | 66 | return loss / num_classes 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /utils/feature_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation for the Memory Bank for pixel-level feature vectors 3 | """ 4 | 5 | import torch 6 | import numpy as np 7 | import random 8 | 9 | class FeatureMemory: 10 | def __init__(self, num_samples, dataset, memory_per_class=2048, feature_size=256, n_classes=19): 11 | self.num_samples = num_samples 12 | self.memory_per_class = memory_per_class 13 | self.feature_size = feature_size 14 | self.memory = [None] * n_classes 15 | self.n_classes = n_classes 16 | if dataset == 'cityscapes': # usually all classes in one image 17 | self.per_class_samples_per_image = max(1, int(round(memory_per_class / num_samples))) 18 | elif dataset == 'pascal_voc': # usually only around 3 classes on each image, except background class 19 | self.per_class_samples_per_image = max(1, int(n_classes / 3 * round(memory_per_class / num_samples))) 20 | 21 | 22 | 23 | def add_features_from_sample_learned(self, model, features, class_labels, batch_size): 24 | """ 25 | Updates the memory bank with some quality feature vectors per class 26 | Args: 27 | model: segmentation model containing the self-attention modules (contrastive_class_selectors) 28 | features: BxFxWxH feature maps containing the feature vectors for the contrastive (already applied the projection head) 29 | class_labels: BxWxH corresponding labels to the [features] 30 | batch_size: batch size 31 | 32 | Returns: 33 | 34 | """ 35 | features = features.detach() 36 | class_labels = class_labels.detach().cpu().numpy() 37 | 38 | elements_per_class = batch_size * self.per_class_samples_per_image 39 | 40 | # for each class, save [elements_per_class] 41 | for c in range(self.n_classes): 42 | mask_c = class_labels == c # get mask for class c 43 | selector = model.__getattr__('contrastive_class_selector_' + str(c)) # get the self attention moduel for class c 44 | features_c = features[mask_c, :] # get features from class c 45 | if features_c.shape[0] > 0: 46 | if features_c.shape[0] > elements_per_class: 47 | with torch.no_grad(): 48 | # get ranking scores 49 | rank = selector(features_c) 50 | rank = torch.sigmoid(rank) 51 | # sort them 52 | _, indices = torch.sort(rank[:, 0], dim=0) 53 | indices = indices.cpu().numpy() 54 | features_c = features_c.cpu().numpy() 55 | # get features with highest rankings 56 | features_c = features_c[indices, :] 57 | new_features = features_c[:elements_per_class, :] 58 | else: 59 | new_features = features_c.cpu().numpy() 60 | 61 | if self.memory[c] is None: # was empy, first elements 62 | self.memory[c] = new_features 63 | 64 | else: # add elements to already existing list 65 | # keep only most recent memory_per_class samples 66 | self.memory[c] = np.concatenate((new_features, self.memory[c]), axis = 0)[:self.memory_per_class, :] 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /data/augmentations.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code taken from https://github.com/WilhelmT/ClassMix 3 | Slightly modified 4 | ''' 5 | 6 | import random 7 | import numpy as np 8 | from PIL import Image, ImageOps 9 | 10 | 11 | class Compose(object): 12 | def __init__(self, augmentations): 13 | self.augmentations = augmentations 14 | 15 | def __call__(self, img, mask): 16 | img, mask = Image.fromarray(img, mode="RGB"), Image.fromarray(mask, mode="L") 17 | assert img.size == mask.size 18 | for a in self.augmentations: 19 | img, mask = a(img, mask) 20 | return np.array(img), np.array(mask, dtype=np.uint8) 21 | 22 | 23 | 24 | class RandomCrop_city(object): 25 | def __init__(self, size, padding=0): 26 | self.size = tuple(size) 27 | self.padding = padding 28 | 29 | def __call__(self, img, mask): 30 | if self.padding > 0: 31 | img = ImageOps.expand(img, border=self.padding, fill=0) 32 | mask = ImageOps.expand(mask, border=self.padding, fill=0) 33 | 34 | assert img.size == mask.size 35 | w, h = img.size 36 | th, tw = self.size 37 | 38 | # Resize to half size 39 | img = img.resize((int(w/2), int(h/2)), Image.BILINEAR) 40 | mask = mask.resize((int(w/2), int(h/2)), Image.NEAREST) 41 | 42 | # Random crop to input size 43 | x1 = random.randint(0, int(w/2) - tw) 44 | y1 = random.randint(0, int(h/2) - th) 45 | 46 | return ( 47 | img.crop((x1, y1, x1 + tw, y1 + th)), 48 | mask.crop((x1, y1, x1 + tw, y1 + th)), 49 | ) 50 | 51 | 52 | class RandomCrop_city_highres(object): 53 | def __init__(self, size, padding=0): 54 | self.size = tuple(size) 55 | self.padding = padding 56 | 57 | def __call__(self, img, mask): 58 | if self.padding > 0: 59 | img = ImageOps.expand(img, border=self.padding, fill=0) 60 | mask = ImageOps.expand(mask, border=self.padding, fill=0) 61 | 62 | assert img.size == mask.size 63 | w, h = img.size 64 | th, tw = self.size 65 | 66 | x1 = random.randint(0, int(w) - tw) 67 | y1 = random.randint(0, int(h) - th) 68 | return ( 69 | img.crop((x1, y1, x1 + tw, y1 + th)), 70 | mask.crop((x1, y1, x1 + tw, y1 + th)), 71 | ) 72 | 73 | 74 | class Resize_city(object): 75 | def __init__(self, padding=0): 76 | self.padding = padding 77 | 78 | def __call__(self, img, mask): 79 | if self.padding > 0: 80 | img = ImageOps.expand(img, border=self.padding, fill=0) 81 | mask = ImageOps.expand(mask, border=self.padding, fill=0) 82 | 83 | assert img.size == mask.size 84 | w, h = img.size 85 | 86 | # Resize to half size 87 | img = img.resize((int(w/2), int(h/2)), Image.BILINEAR) 88 | 89 | return img, mask 90 | 91 | 92 | class Resize_city_highres(object): 93 | def __init__(self, padding=0): 94 | self.padding = padding 95 | 96 | def __call__(self, img, mask): 97 | if self.padding > 0: 98 | img = ImageOps.expand(img, border=self.padding, fill=0) 99 | mask = ImageOps.expand(mask, border=self.padding, fill=0) 100 | 101 | 102 | return img, mask -------------------------------------------------------------------------------- /utils/metric.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code taken from https://github.com/WilhelmT/ClassMix 3 | Slightly modified 4 | ''' 5 | 6 | import numpy as np 7 | from multiprocessing import Pool 8 | 9 | class ConfusionMatrix(object): 10 | 11 | def __init__(self, nclass, classes=None): 12 | self.nclass = nclass 13 | self.classes = classes 14 | self.M = np.zeros((nclass, nclass)) 15 | 16 | def add(self, gt, pred): 17 | assert (np.max(pred) <= self.nclass) 18 | assert (len(gt) == len(pred)) 19 | for i in range(len(gt)): 20 | if not gt[i] == 255: 21 | self.M[gt[i], pred[i]] += 1.0 22 | 23 | def addM(self, matrix): 24 | assert (matrix.shape == self.M.shape) 25 | self.M += matrix 26 | 27 | def __str__(self): 28 | pass 29 | 30 | def recall(self): 31 | recall = 0.0 32 | for i in range(self.nclass): 33 | recall += self.M[i, i] / np.sum(self.M[:, i]) 34 | 35 | return recall / self.nclass 36 | 37 | def accuracy(self): 38 | accuracy = 0.0 39 | for i in range(self.nclass): 40 | accuracy += self.M[i, i] / np.sum(self.M[i, :]) 41 | 42 | return accuracy / self.nclass 43 | 44 | def jaccard(self): 45 | jaccard_perclass = [] 46 | for i in range(self.nclass): 47 | intersection = self.M[i, i] 48 | union = (np.sum(self.M[i, :]) + np.sum(self.M[:, i]) - self.M[i, i]) 49 | 50 | jaccard_perclass.append(intersection / union) 51 | 52 | return np.sum(jaccard_perclass) / len(jaccard_perclass), jaccard_perclass, self.M 53 | 54 | def generateM(self, item): 55 | gt, pred = item 56 | m = np.zeros((self.nclass, self.nclass)) 57 | assert (len(gt) == len(pred)) 58 | for i in range(len(gt)): 59 | if gt[i] < self.nclass: 60 | m[gt[i], pred[i]] += 1.0 61 | return m 62 | 63 | 64 | def _pickle_method(m): 65 | if m.im_self is None: 66 | return getattr, (m.im_class, m.im_func.func_name) 67 | else: 68 | return getattr, (m.im_self, m.im_func.func_name) 69 | 70 | 71 | def get_iou(data_list, class_num, save_path=None): 72 | ConfM = ConfusionMatrix(class_num) 73 | f = ConfM.generateM 74 | pool = Pool() 75 | m_list = pool.map(f, data_list) 76 | pool.close() 77 | pool.join() 78 | 79 | for m in m_list: 80 | ConfM.addM(m) 81 | 82 | aveJ, j_list, M = ConfM.jaccard() 83 | 84 | classes = np.array(('background', # always index 0 85 | 'aeroplane', 'bicycle', 'bird', 'boat', 86 | 'bottle', 'bus', 'car', 'cat', 'chair', 87 | 'cow', 'diningtable', 'dog', 'horse', 88 | 'motorbike', 'person', 'pottedplant', 89 | 'sheep', 'sofa', 'train', 'tvmonitor')) 90 | 91 | for i, iou in enumerate(j_list): 92 | print('class {:2d} {:12} IU {:.2f}'.format(i, classes[i], j_list[i])) 93 | 94 | 95 | print('meanIOU: ' + str(aveJ) + '\n') 96 | if save_path: 97 | with open(save_path, 'w') as f: 98 | for i, iou in enumerate(j_list): 99 | f.write('class {:2d} {:12} IU {:.2f}'.format(i, classes[i], j_list[i]) + '\n') 100 | f.write('meanIOU: ' + str(aveJ) + '\n') 101 | -------------------------------------------------------------------------------- /utils/sync_batchnorm/replicate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : replicate.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import functools 12 | 13 | from torch.nn.parallel.data_parallel import DataParallel 14 | 15 | __all__ = [ 16 | 'CallbackContext', 17 | 'execute_replication_callbacks', 18 | 'DataParallelWithCallback', 19 | 'patch_replication_callback' 20 | ] 21 | 22 | 23 | class CallbackContext(object): 24 | pass 25 | 26 | 27 | def execute_replication_callbacks(modules): 28 | """ 29 | Execute an replication callback `__data_parallel_replicate__` on each module created by original replication. 30 | 31 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` 32 | 33 | Note that, as all modules are isomorphism, we assign each sub-module with a context 34 | (shared among multiple copies of this module on different devices). 35 | Through this context, different copies can share some information. 36 | 37 | We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback 38 | of any slave copies. 39 | """ 40 | master_copy = modules[0] 41 | nr_modules = len(list(master_copy.modules())) 42 | ctxs = [CallbackContext() for _ in range(nr_modules)] 43 | 44 | for i, module in enumerate(modules): 45 | for j, m in enumerate(module.modules()): 46 | if hasattr(m, '__data_parallel_replicate__'): 47 | m.__data_parallel_replicate__(ctxs[j], i) 48 | 49 | 50 | class DataParallelWithCallback(DataParallel): 51 | """ 52 | Data Parallel with a replication callback. 53 | 54 | An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by 55 | original `replicate` function. 56 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` 57 | 58 | Examples: 59 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 60 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 61 | # sync_bn.__data_parallel_replicate__ will be invoked. 62 | """ 63 | 64 | def replicate(self, module, device_ids): 65 | modules = super(DataParallelWithCallback, self).replicate(module, device_ids) 66 | execute_replication_callbacks(modules) 67 | return modules 68 | 69 | 70 | def patch_replication_callback(data_parallel): 71 | """ 72 | Monkey-patch an existing `DataParallel` object. Add the replication callback. 73 | Useful when you have customized `DataParallel` implementation. 74 | 75 | Examples: 76 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 77 | > sync_bn = DataParallel(sync_bn, device_ids=[0, 1]) 78 | > patch_replication_callback(sync_bn) 79 | # this is equivalent to 80 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 81 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 82 | """ 83 | 84 | assert isinstance(data_parallel, DataParallel) 85 | 86 | old_replicate = data_parallel.replicate 87 | 88 | @functools.wraps(old_replicate) 89 | def new_replicate(module, device_ids): 90 | modules = old_replicate(module, device_ids) 91 | execute_replication_callbacks(modules) 92 | return modules 93 | 94 | data_parallel.replicate = new_replicate 95 | -------------------------------------------------------------------------------- /data/voc_dataset.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code taken from https://github.com/hfslyc/AdvSemiSeg 3 | Slightly modified 4 | ''' 5 | 6 | import os.path as osp 7 | import numpy as np 8 | import random 9 | import torch 10 | import cv2 11 | from torch.utils import data 12 | 13 | class VOCDataSet(data.Dataset): 14 | def __init__(self, root, split="train", max_iters=None, crop_size=(321, 321), scale=True, mirror=True, ignore_label=255, pretraining='COCO'): 15 | self.root = root 16 | self.crop_h, self.crop_w = crop_size 17 | self.scale = scale 18 | self.pretraining = pretraining 19 | self.ignore_label = ignore_label 20 | self.is_mirror = mirror 21 | self.split=split 22 | if split == "train": 23 | list_path = './data/voc_list/train_aug.txt' 24 | elif split == "val": 25 | list_path = './data/voc_list/val.txt' 26 | self.img_ids = [i_id.strip() for i_id in open(list_path)] 27 | if not max_iters==None: 28 | self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids))) 29 | self.files = [] 30 | for name in self.img_ids: 31 | img_file = osp.join(self.root, "JPEGImages/%s.jpg" % name) 32 | label_file = osp.join(self.root, "SegmentationClassAug/%s.png" % name) 33 | self.files.append({ 34 | "img": img_file, 35 | "label": label_file, 36 | "name": name 37 | }) 38 | self.class_names = ['background', # always index 0 39 | 'aeroplane', 'bicycle', 'bird', 'boat', 40 | 'bottle', 'bus', 'car', 'cat', 'chair', 41 | 'cow', 'diningtable', 'dog', 'horse', 42 | 'motorbike', 'person', 'pottedplant', 43 | 'sheep', 'sofa', 'train', 'tvmonitor'] 44 | 45 | def __len__(self): 46 | return len(self.files) 47 | 48 | 49 | def __getitem__(self, index): 50 | datafiles = self.files[index] 51 | image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) 52 | label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE) 53 | if self.pretraining == 'COCO': # if pratraining is not COCO, change to RGB 54 | image = image 55 | else: 56 | image = image[:, :, ::-1] 57 | 58 | size = image.shape 59 | name = datafiles["name"] 60 | image = np.asarray(image, np.float32) 61 | img_h, img_w = label.shape 62 | if "val" not in self.split: # output size with pad or crop 63 | pad_h = max(self.crop_h - img_h, 0) 64 | pad_w = max(self.crop_w - img_w, 0) 65 | if pad_h > 0 or pad_w > 0: 66 | img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, 67 | pad_w, cv2.BORDER_CONSTANT, 68 | value=(0.0, 0.0, 0.0)) 69 | label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, 70 | pad_w, cv2.BORDER_CONSTANT, 71 | value=(self.ignore_label,)) 72 | else: 73 | img_pad, label_pad = image, label 74 | 75 | img_h, img_w = label_pad.shape 76 | h_off = random.randint(0, img_h - self.crop_h) 77 | w_off = random.randint(0, img_w - self.crop_w) 78 | image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32) 79 | label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.int64) 80 | 81 | image = image.transpose((2, 0, 1)) 82 | label = label.astype(int) 83 | 84 | return torch.from_numpy(image).float(), torch.from_numpy(label).long(), np.array(size), name, index 85 | -------------------------------------------------------------------------------- /utils/class_balancing.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | This class implements the curriculum class balancing. 4 | It implements a squared median frequency class balancing but taking both labeled and unlabeled data into account. 5 | Unlabeled data is taken into account using pseudolabels that are updated at every iteration 6 | 7 | """ 8 | 9 | 10 | import numpy as np 11 | 12 | 13 | class ClassBalancing: 14 | 15 | def __init__(self, labeled_iters, unlabeled_iters, n_classes=19): 16 | """ 17 | 18 | Args: 19 | labeled_iters: Number of iterations to fill up the memory of labeled statistics 20 | unlabeled_iters: Number of iterations to fill up the memory of unlabeled statistics 21 | n_classes: number of classes of the dataset 22 | """ 23 | self.labeled_samples = labeled_iters 24 | self.unlabeled_samples = unlabeled_iters 25 | self.n_classes = n_classes 26 | 27 | # build memory to store the statistcs of the labels for labeled and unlabeled data 28 | self.labeled_frequencies = np.zeros((labeled_iters, n_classes), dtype = np.long) 29 | self.unlabeled_frequencies = np.zeros((unlabeled_iters, n_classes), dtype = np.long) 30 | 31 | self.iter = 0 # iteration counter 32 | self.start_computing_iter = max(labeled_iters, unlabeled_iters) # number of iterations to take into account all statistics of the dataset 33 | 34 | 35 | def compute_frequencies(self, samples): 36 | """ 37 | 38 | Args: 39 | samples: BxWxH labels or pseudolabels 40 | 41 | Returns: computes per-class frequencies from the input labels 42 | 43 | """ 44 | freqs = np.zeros((self.n_classes)) 45 | for c in range(self.n_classes): 46 | mask_freq_c = (samples == c).astype(float) 47 | freqs[c] = mask_freq_c.sum() 48 | return freqs 49 | 50 | def add_frequencies(self, labeled_samples, unlabeled_samples): 51 | """ 52 | Given some labels and pseudolabels of an training iteration, add them to the statistics memories 53 | Args: 54 | labeled_samples: BxWxH labels 55 | unlabeled_samples: BxWxH pseudolabels 56 | 57 | 58 | """ 59 | 60 | if self.iter < self.labeled_samples: 61 | labeled_freqs = self.compute_frequencies(labeled_samples) 62 | self.labeled_frequencies[self.iter, :] = labeled_freqs 63 | 64 | unl_freqs = self.compute_frequencies(unlabeled_samples) 65 | 66 | if self.iter < self.unlabeled_samples: 67 | self.unlabeled_frequencies[self.iter, :] = unl_freqs 68 | else: # remove first, add this one at the bottom (concat) 69 | # only for unlabeled because labeled doesnot change once is filled 70 | self.unlabeled_frequencies = self.unlabeled_frequencies[1:, :] 71 | self.unlabeled_frequencies = np.concatenate((self.unlabeled_frequencies, np.expand_dims(unl_freqs, 0)), axis=0) 72 | 73 | self.iter += 1 74 | 75 | def add_frequencies_labeled(self, labeled_samples): 76 | 77 | if self.iter < self.labeled_samples: 78 | labeled_freqs = self.compute_frequencies(labeled_samples) 79 | self.labeled_frequencies[self.iter, :] = labeled_freqs 80 | 81 | 82 | def add_frequencies_unlabeled(self, unlabeled_samples): 83 | 84 | unl_freqs = self.compute_frequencies(unlabeled_samples) 85 | 86 | if self.iter < self.unlabeled_samples: 87 | self.unlabeled_frequencies[self.iter, :] = unl_freqs 88 | else: # remove first, add this one at the bottom (concat) 89 | # only for unlabeled because labeled doesnot change 90 | self.unlabeled_frequencies = self.unlabeled_frequencies[1:, :] 91 | self.unlabeled_frequencies = np.concatenate((self.unlabeled_frequencies, np.expand_dims(unl_freqs, 0)), axis=0) 92 | 93 | self.iter += 1 94 | 95 | 96 | 97 | def get_weights(self, max_iter, only_labeled=False): 98 | if self.iter < self.start_computing_iter: # do not compute weights until the memories are filled up 99 | return np.ones((self.n_classes)) 100 | else: # inverse median, frequency 101 | ratio_unlabeled = 1 # min (1., self.iter / max_iter) # weigth to give to the pseudolabels statistics 102 | freqs_labeled = np.sum(self.labeled_frequencies, axis = 0) 103 | freqs_unlabeled = np.sum(self.unlabeled_frequencies, axis = 0) 104 | if only_labeled: 105 | ratio_unlabeled = 0 106 | 107 | freqs = freqs_labeled + freqs_unlabeled * ratio_unlabeled 108 | 109 | median = np.median(freqs) 110 | weights = median / freqs 111 | 112 | # deal with classes with no samples 113 | mask_inf = np.isinf(weights) 114 | weights[mask_inf] = 1 115 | weights[mask_inf] = max(weights) 116 | 117 | return np.power(weights, 0.5) 118 | 119 | -------------------------------------------------------------------------------- /utils/sync_batchnorm/comm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : comm.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import queue 12 | import collections 13 | import threading 14 | 15 | __all__ = ['FutureResult', 'SlavePipe', 'SyncMaster'] 16 | 17 | 18 | class FutureResult(object): 19 | """A thread-safe future implementation. Used only as one-to-one pipe.""" 20 | 21 | def __init__(self): 22 | self._result = None 23 | self._lock = threading.Lock() 24 | self._cond = threading.Condition(self._lock) 25 | 26 | def put(self, result): 27 | with self._lock: 28 | assert self._result is None, 'Previous result has\'t been fetched.' 29 | self._result = result 30 | self._cond.notify() 31 | 32 | def get(self): 33 | with self._lock: 34 | if self._result is None: 35 | self._cond.wait() 36 | 37 | res = self._result 38 | self._result = None 39 | return res 40 | 41 | 42 | _MasterRegistry = collections.namedtuple('MasterRegistry', ['result']) 43 | _SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result']) 44 | 45 | 46 | class SlavePipe(_SlavePipeBase): 47 | """Pipe for master-slave communication.""" 48 | 49 | def run_slave(self, msg): 50 | self.queue.put((self.identifier, msg)) 51 | ret = self.result.get() 52 | self.queue.put(True) 53 | return ret 54 | 55 | 56 | class SyncMaster(object): 57 | """An abstract `SyncMaster` object. 58 | 59 | - During the replication, as the data parallel will trigger an callback of each module, all slave devices should 60 | call `register(id)` and obtain an `SlavePipe` to communicate with the master. 61 | - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected, 62 | and passed to a registered callback. 63 | - After receiving the messages, the master device should gather the information and determine to message passed 64 | back to each slave devices. 65 | """ 66 | 67 | def __init__(self, master_callback): 68 | """ 69 | 70 | Args: 71 | master_callback: a callback to be invoked after having collected messages from slave devices. 72 | """ 73 | self._master_callback = master_callback 74 | self._queue = queue.Queue() 75 | self._registry = collections.OrderedDict() 76 | self._activated = False 77 | 78 | def __getstate__(self): 79 | return {'master_callback': self._master_callback} 80 | 81 | def __setstate__(self, state): 82 | self.__init__(state['master_callback']) 83 | 84 | def register_slave(self, identifier): 85 | """ 86 | Register an slave device. 87 | 88 | Args: 89 | identifier: an identifier, usually is the device id. 90 | 91 | Returns: a `SlavePipe` object which can be used to communicate with the master device. 92 | 93 | """ 94 | if self._activated: 95 | assert self._queue.empty(), 'Queue is not clean before next initialization.' 96 | self._activated = False 97 | self._registry.clear() 98 | future = FutureResult() 99 | self._registry[identifier] = _MasterRegistry(future) 100 | return SlavePipe(identifier, self._queue, future) 101 | 102 | def run_master(self, master_msg): 103 | """ 104 | Main entry for the master device in each forward pass. 105 | The messages were first collected from each devices (including the master device), and then 106 | an callback will be invoked to compute the message to be sent back to each devices 107 | (including the master device). 108 | 109 | Args: 110 | master_msg: the message that the master want to send to itself. This will be placed as the first 111 | message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example. 112 | 113 | Returns: the message to be sent back to the master device. 114 | 115 | """ 116 | self._activated = True 117 | 118 | intermediates = [(0, master_msg)] 119 | for i in range(self.nr_slaves): 120 | intermediates.append(self._queue.get()) 121 | 122 | results = self._master_callback(intermediates) 123 | assert results[0][0] == 0, 'The first result should belongs to the master.' 124 | 125 | for i, res in results: 126 | if i == 0: 127 | continue 128 | self._registry[i].result.put(res) 129 | 130 | for i in range(self.nr_slaves): 131 | assert self._queue.get() is True 132 | 133 | return results[0][1] 134 | 135 | @property 136 | def nr_slaves(self): 137 | return len(self._registry) 138 | -------------------------------------------------------------------------------- /utils/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | import numpy as np 6 | 7 | class CrossEntropy2d(nn.Module): 8 | 9 | def __init__(self, ignore_label=255): 10 | super(CrossEntropy2d, self).__init__() 11 | self.ignore_label = ignore_label 12 | 13 | def forward(self, predict, target, weight=None): 14 | """ 15 | Args: 16 | predict:(n, c, h, w) 17 | target:(n, h, w) 18 | weight (Tensor, optional): a manual rescaling weight given to each class. 19 | If given, has to be a Tensor of size "nclasses" 20 | """ 21 | assert not target.requires_grad 22 | assert predict.dim() == 4 23 | assert target.dim() == 3 24 | n, c, h, w = predict.size() 25 | target_mask = (target >= 0) * (target != self.ignore_label) 26 | target = target[target_mask] 27 | if not target.data.dim(): 28 | return Variable(torch.zeros(1)) 29 | predict = predict.transpose(1, 2).transpose(2, 3).contiguous() 30 | predict = predict[target_mask.view(n, h, w, 1).repeat(1, 1, 1, c)].view(-1, c) 31 | loss = F.cross_entropy(predict, target, weight=weight, reduction='mean') 32 | return loss 33 | 34 | 35 | class CrossEntropyLoss2dPixelWiseWeighted(nn.Module): 36 | def __init__(self, weight=None, ignore_index=250, reduction='none'): 37 | super(CrossEntropyLoss2dPixelWiseWeighted, self).__init__() 38 | self.CE = nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index, reduction=reduction) 39 | 40 | def forward(self, output, target, pixelWiseWeight): 41 | loss = self.CE(output, target) 42 | loss = torch.mean(loss * pixelWiseWeight) 43 | return loss 44 | 45 | class MSELoss2d(nn.Module): 46 | def __init__(self, size_average=None, reduce=None, reduction='mean', ignore_index=255): 47 | super(MSELoss2d, self).__init__() 48 | self.MSE = nn.MSELoss(size_average=size_average, reduce=reduce, reduction=reduction) 49 | 50 | def forward(self, output, target): 51 | loss = self.MSE(torch.softmax(output, dim=1), target) 52 | return loss 53 | 54 | def customsoftmax(inp, multihotmask): 55 | """ 56 | Custom Softmax 57 | """ 58 | soft = torch.softmax(inp,dim=1) 59 | # This takes the mask * softmax ( sums it up hence summing up the classes in border 60 | # then takes of summed up version vs no summed version 61 | return torch.log( 62 | torch.max(soft, (multihotmask * (soft * multihotmask).sum(1, keepdim=True))) 63 | ) 64 | 65 | class ImgWtLossSoftNLL(nn.Module): 66 | """ 67 | Relax Loss 68 | """ 69 | 70 | def __init__(self, classes, ignore_index=255, weights=None, upper_bound=1.0, 71 | norm=False): 72 | super(ImgWtLossSoftNLL, self).__init__() 73 | self.weights = weights 74 | self.num_classes = classes 75 | self.ignore_index = ignore_index 76 | self.upper_bound = upper_bound 77 | self.norm = norm 78 | self.batch_weights = False 79 | self.fp16 = False 80 | 81 | 82 | def calculate_weights(self, target): 83 | """ 84 | Calculate weights of the classes based on training crop 85 | """ 86 | if len(target.shape) == 3: 87 | hist = np.sum(target, axis=(1, 2)) * 1.0 / target.sum() 88 | else: 89 | hist = np.sum(target, axis=(0, 2, 3)) * 1.0 / target.sum() 90 | if self.norm: 91 | hist = ((hist != 0) * self.upper_bound * (1 / hist)) + 1 92 | else: 93 | hist = ((hist != 0) * self.upper_bound * (1 - hist)) + 1 94 | return hist[:] 95 | 96 | def custom_nll(self, inputs, target, class_weights, border_weights, mask): 97 | """ 98 | NLL Relaxed Loss Implementation 99 | """ 100 | #if (cfg.REDUCE_BORDER_EPOCH != -1 and cfg.EPOCH > cfg.REDUCE_BORDER_EPOCH): 101 | # border_weights = 1 / border_weights 102 | # target[target > 1] = 1 103 | if self.fp16: 104 | loss_matrix = (-1 / border_weights * 105 | (target[:, :, :, :].half() * 106 | class_weights.unsqueeze(0).unsqueeze(2).unsqueeze(3) * 107 | customsoftmax(inputs, target[:, :, :, :].half())).sum(1)) * \ 108 | (1. - mask.half()) 109 | else: 110 | loss_matrix = (-1 / border_weights * 111 | (target[:, :, :, :].float() * 112 | class_weights.unsqueeze(0).unsqueeze(2).unsqueeze(3) * 113 | customsoftmax(inputs, target[:, :, :, :].float())).sum(1)) * \ 114 | (1. - mask.float()) 115 | 116 | # loss_matrix[border_weights > 1] = 0 117 | loss = loss_matrix.sum() 118 | 119 | # +1 to prevent division by 0 120 | loss = loss / (target.shape[0] * target.shape[2] * target.shape[3] - mask.sum().item() + 1) 121 | return loss 122 | 123 | def forward(self, inputs, target): 124 | if self.fp16: 125 | weights = target[:, :, :, :].sum(1).half() 126 | else: 127 | weights = target[:, :, :, :].sum(1).float() 128 | ignore_mask = (weights == 0) 129 | weights[ignore_mask] = 1 130 | 131 | loss = 0 132 | target_cpu = target.data.cpu().numpy() 133 | 134 | if self.batch_weights: 135 | class_weights = self.calculate_weights(target_cpu) 136 | 137 | for i in range(0, inputs.shape[0]): 138 | if not self.batch_weights: 139 | class_weights = self.calculate_weights(target_cpu[i]) 140 | 141 | class_weights = torch.ones((class_weights.shape)) 142 | loss = loss + self.custom_nll(inputs[i].unsqueeze(0), 143 | target[i].unsqueeze(0), 144 | class_weights=torch.Tensor(class_weights).cuda(), 145 | border_weights=weights, mask=ignore_mask[i]) 146 | 147 | return loss 148 | -------------------------------------------------------------------------------- /data/gta5_loader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code taken from https://github.com/WilhelmT/ClassMix 3 | Slightly modified 4 | ''' 5 | 6 | 7 | import os 8 | import torch 9 | import scipy.misc as m 10 | from torch.utils import data 11 | from data.city_utils import recursive_glob 12 | from data.augmentations import * 13 | 14 | class gtaLoader(data.Dataset): 15 | """cityscapesLoader 16 | https://www.cityscapes-dataset.com 17 | Data is derived from CityScapes, and can be downloaded from here: 18 | https://www.cityscapes-dataset.com/downloads/ 19 | Many Thanks to @fvisin for the loader repo: 20 | https://github.com/fvisin/dataset_loaders/blob/master/dataset_loaders/images/cityscapes.py 21 | """ 22 | 23 | colors = [ # [ 0, 0, 0], 24 | [128, 64, 128], 25 | [244, 35, 232], 26 | [70, 70, 70], 27 | [102, 102, 156], 28 | [190, 153, 153], 29 | [153, 153, 153], 30 | [250, 170, 30], 31 | [220, 220, 0], 32 | [107, 142, 35], 33 | [152, 251, 152], 34 | [0, 130, 180], 35 | [220, 20, 60], 36 | [255, 0, 0], 37 | [0, 0, 142], 38 | [0, 0, 70], 39 | [0, 60, 100], 40 | [0, 80, 100], 41 | [0, 0, 230], 42 | [119, 11, 32], 43 | ] 44 | 45 | label_colours = dict(zip(range(19), colors)) 46 | 47 | def __init__( 48 | self, 49 | root, 50 | split="train", 51 | is_transform=False, 52 | img_size=(512, 1024), 53 | img_norm=False, 54 | augmentations=None, 55 | version="cityscapes", 56 | pretraining='COCO', 57 | return_id=False, 58 | ): 59 | """__init__ 60 | :param root: 61 | :param split: 62 | :param is_transform: 63 | :param img_size: 64 | :param augmentations 65 | """ 66 | self.root = root 67 | self.split = split 68 | self.pretraining = pretraining 69 | self.is_transform = is_transform 70 | self.augmentations = augmentations 71 | self.img_norm = img_norm 72 | self.n_classes = 19 73 | self.img_size = ( 74 | img_size if isinstance(img_size, tuple) else (img_size, img_size) 75 | ) 76 | self.files = {} 77 | 78 | self.images_base = os.path.join(self.root, "images", self.split) 79 | 80 | self.annotations_base = os.path.join( 81 | self.root, "labels", self.split 82 | ) 83 | 84 | self.files[split] = recursive_glob(rootdir=self.images_base, suffix=".png") 85 | self.void_classes = [0, 1, 2, 3, 4, 5, 6, 9, 10, 14, 15, 16, 18, 29, 30, -1] 86 | self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33,] 87 | self.class_names = [ 88 | "unlabelled", 89 | "road", 90 | "sidewalk", 91 | "building", 92 | "wall", 93 | "fence", 94 | "pole", 95 | "traffic_light", 96 | "traffic_sign", 97 | "vegetation", 98 | "terrain", 99 | "sky", 100 | "person", 101 | "rider", 102 | "car", 103 | "truck", 104 | "bus", 105 | "train", 106 | "motorcycle", 107 | "bicycle", 108 | ] 109 | 110 | self.ignore_index = 250 111 | self.class_map = dict(zip(self.valid_classes, range(19))) 112 | 113 | if not self.files[split]: 114 | raise Exception( 115 | "No files for split=[%s] found in %s" % (split, self.images_base) 116 | ) 117 | 118 | print("Found %d %s images" % (len(self.files[split]), split)) 119 | 120 | self.return_id = return_id 121 | 122 | def __len__(self): 123 | """__len__""" 124 | return len(self.files[self.split]) 125 | 126 | def __getitem__(self, index): 127 | """__getitem__ 128 | :param index: 129 | """ 130 | img_path = self.files[self.split][index].rstrip() 131 | lbl_path = img_path.replace('images', 'labels') 132 | 133 | try: 134 | img = m.imread(img_path) 135 | img = np.array(img, dtype=np.uint8) 136 | 137 | lbl = m.imread(lbl_path) 138 | lbl = np.array(lbl, dtype=np.uint8) 139 | 140 | if self.augmentations is not None: 141 | img, lbl = self.augmentations(img, lbl) 142 | if self.is_transform: 143 | img, lbl = self.transform(img, lbl) 144 | 145 | img_name = img_path.split('/')[-1] 146 | if self.return_id: 147 | return img, lbl, img_name, img_name, index 148 | return img, lbl, img_path, lbl_path, img_name 149 | except: 150 | print(img_path) 151 | self.files[self.split].pop(index) 152 | return self.__getitem__(index - 1) 153 | 154 | 155 | def transform(self, img, lbl): 156 | """transform 157 | :param img: 158 | :param lbl: 159 | """ 160 | if self.pretraining == 'COCO': 161 | img = img[:, :, ::-1] 162 | img = img.astype(np.float64) 163 | # NHWC -> NCHW 164 | img = img.transpose(2, 0, 1) 165 | img = torch.from_numpy(img).float() 166 | lbl = torch.from_numpy(lbl).long() 167 | return img, lbl 168 | 169 | def decode_segmap(self, temp): 170 | r = temp.copy() 171 | g = temp.copy() 172 | b = temp.copy() 173 | for l in range(0, self.n_classes): 174 | r[temp == l] = self.label_colours[l][0] 175 | g[temp == l] = self.label_colours[l][1] 176 | b[temp == l] = self.label_colours[l][2] 177 | 178 | rgb = np.zeros((temp.shape[0], temp.shape[1], 3)) 179 | rgb[:, :, 0] = r / 255.0 180 | rgb[:, :, 1] = g / 255.0 181 | rgb[:, :, 2] = b / 255.0 182 | return rgb 183 | 184 | def encode_segmap(self, mask): 185 | # Put all void classes to zero 186 | for _voidc in self.void_classes: 187 | mask[mask == _voidc] = self.ignore_index 188 | for _validc in self.valid_classes: 189 | mask[mask == _validc] = self.class_map[_validc] 190 | return mask 191 | -------------------------------------------------------------------------------- /data/cityscapes_loader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code taken from https://github.com/WilhelmT/ClassMix 3 | Slightly modified 4 | ''' 5 | 6 | import os 7 | import torch 8 | import scipy.misc as m 9 | from torch.utils import data 10 | from data.city_utils import recursive_glob 11 | from data.augmentations import * 12 | 13 | class cityscapesLoader(data.Dataset): 14 | """cityscapesLoader 15 | https://www.cityscapes-dataset.com 16 | Data is derived from CityScapes, and can be downloaded from here: 17 | https://www.cityscapes-dataset.com/downloads/ 18 | Many Thanks to @fvisin for the loader repo: 19 | https://github.com/fvisin/dataset_loaders/blob/master/dataset_loaders/images/cityscapes.py 20 | """ 21 | 22 | colors = [ # [ 0, 0, 0], 23 | [128, 64, 128], 24 | [244, 35, 232], 25 | [70, 70, 70], 26 | [102, 102, 156], 27 | [190, 153, 153], 28 | [153, 153, 153], 29 | [250, 170, 30], 30 | [220, 220, 0], 31 | [107, 142, 35], 32 | [152, 251, 152], 33 | [70, 130, 180], 34 | [220, 20, 60], 35 | [255, 0, 0], 36 | [0, 0, 142], 37 | [0, 0, 70], 38 | [0, 60, 100], 39 | [0, 80, 100], 40 | [0, 0, 230], 41 | [119, 11, 32], 42 | ] 43 | 44 | label_colours = dict(zip(range(19), colors)) 45 | 46 | def __init__( 47 | self, 48 | root, 49 | split="train", 50 | is_transform=False, 51 | img_size=(512, 1024), 52 | img_norm=False, 53 | augmentations=None, 54 | return_id=False, 55 | pretraining='COCO', 56 | ): 57 | """__init__ 58 | :param root: 59 | :param split: 60 | :param is_transform: 61 | :param img_size: 62 | :param augmentations 63 | """ 64 | self.root = root 65 | self.pretraining = pretraining 66 | self.split = split 67 | self.is_transform = is_transform 68 | self.augmentations = augmentations 69 | self.img_norm = img_norm 70 | self.n_classes = 19 71 | self.img_size = ( 72 | img_size if isinstance(img_size, tuple) else (img_size, img_size) 73 | ) 74 | self.files = {} 75 | 76 | self.images_base = os.path.join(self.root, "leftImg8bit_trainvaltest","leftImg8bit", self.split) 77 | 78 | self.annotations_base = os.path.join( 79 | self.root, "gtFine_trainvaltest", "gtFine", self.split 80 | ) 81 | 82 | self.files[split] = recursive_glob(rootdir=self.images_base, suffix=".png") 83 | self.void_classes = [0, 1, 2, 3, 4, 5, 6, 9, 10, 14, 15, 16, 18, 29, 30, -1] 84 | self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33,] 85 | self.class_names = [ 86 | "unlabelled", 87 | "road", 88 | "sidewalk", 89 | "building", 90 | "wall", 91 | "fence", 92 | "pole", 93 | "traffic_light", 94 | "traffic_sign", 95 | "vegetation", 96 | "terrain", 97 | "sky", 98 | "person", 99 | "rider", 100 | "car", 101 | "truck", 102 | "bus", 103 | "train", 104 | "motorcycle", 105 | "bicycle", 106 | ] 107 | 108 | self.ignore_index = 250 109 | self.class_map = dict(zip(self.valid_classes, range(19))) 110 | 111 | if not self.files[split]: 112 | raise Exception( 113 | "No files for split=[%s] found in %s" % (split, self.images_base) 114 | ) 115 | 116 | print("Found %d %s images" % (len(self.files[split]), split)) 117 | 118 | self.return_id = return_id 119 | 120 | def __len__(self): 121 | """__len__""" 122 | return len(self.files[self.split]) 123 | 124 | def __getitem__(self, index): 125 | """__getitem__ 126 | :param index: 127 | """ 128 | img_path = self.files[self.split][index].rstrip() 129 | lbl_path = os.path.join( 130 | self.annotations_base, 131 | img_path.split(os.sep)[-2], # temporary for cross validation 132 | os.path.basename(img_path)[:-15] + "gtFine_labelIds.png", 133 | ) 134 | try: 135 | img = m.imread(img_path) 136 | img = np.array(img, dtype=np.uint8) 137 | except: 138 | print(img_path) 139 | 140 | lbl = m.imread(lbl_path) 141 | lbl = np.array(lbl, dtype=np.uint8) 142 | lbl = self.encode_segmap(lbl) 143 | 144 | 145 | if self.augmentations is not None: 146 | img, lbl = self.augmentations(img, lbl) 147 | if self.is_transform: 148 | img, lbl = self.transform(img, lbl) 149 | 150 | img_name = img_path.split('/')[-1] 151 | if self.return_id: 152 | return img, lbl, img_name, img_name, index 153 | return img, lbl, img_path, lbl_path, img_name 154 | 155 | def transform(self, img, lbl): 156 | """transform 157 | :param img: 158 | :param lbl: 159 | """ 160 | if self.pretraining == 'COCO': 161 | img = img[:, :, ::-1] 162 | img = img.astype(np.float64) 163 | # NHWC -> NCHW 164 | img = img.transpose(2, 0, 1) 165 | img = torch.from_numpy(img).float() 166 | lbl = torch.from_numpy(lbl).long() 167 | return img, lbl 168 | 169 | def decode_segmap(self, temp): 170 | r = temp.copy() 171 | g = temp.copy() 172 | b = temp.copy() 173 | for l in range(0, self.n_classes): 174 | r[temp == l] = self.label_colours[l][0] 175 | g[temp == l] = self.label_colours[l][1] 176 | b[temp == l] = self.label_colours[l][2] 177 | 178 | rgb = np.zeros((temp.shape[0], temp.shape[1], 3)) 179 | rgb[:, :, 0] = r / 255.0 180 | rgb[:, :, 1] = g / 255.0 181 | rgb[:, :, 2] = b / 255.0 182 | return rgb 183 | 184 | def encode_segmap(self, mask): 185 | # Put all void classes to zero 186 | for _voidc in self.void_classes: 187 | mask[mask == _voidc] = self.ignore_index 188 | for _validc in self.valid_classes: 189 | mask[mask == _validc] = self.class_map[_validc] 190 | return mask 191 | -------------------------------------------------------------------------------- /evaluateSSL.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code taken from https://github.com/WilhelmT/ClassMix 3 | Slightly modified 4 | ''' 5 | 6 | import argparse 7 | from data.augmentations import * 8 | from utils.metric import ConfusionMatrix 9 | from multiprocessing import Pool 10 | 11 | from torch.autograd import Variable 12 | from torch.utils import data 13 | import torch 14 | from data import get_data_path, get_loader 15 | from utils.loss import CrossEntropy2d 16 | 17 | 18 | def get_arguments(): 19 | """Parse all the arguments provided from the CLI. 20 | 21 | Returns: 22 | A list of parsed arguments. 23 | """ 24 | parser = argparse.ArgumentParser(description="SSL evaluation script") 25 | parser.add_argument("-m", "--model-path", type=str, default=None, required=True, 26 | help="Model to evaluate") 27 | parser.add_argument("--gpu", type=int, default=(0,), 28 | help="choose gpu device.") 29 | parser.add_argument("--save-output-images", action="store_true", 30 | help="save output images") 31 | return parser.parse_args() 32 | 33 | 34 | 35 | def get_iou(confM, dataset): 36 | aveJ, j_list, M = confM.jaccard() 37 | 38 | if dataset == 'pascal_voc': 39 | classes = np.array(('background', # always index 0 40 | 'aeroplane', 'bicycle', 'bird', 'boat', 41 | 'bottle', 'bus', 'car', 'cat', 'chair', 42 | 'cow', 'diningtable', 'dog', 'horse', 43 | 'motorbike', 'person', 'pottedplant', 44 | 'sheep', 'sofa', 'train', 'tvmonitor')) 45 | elif dataset == 'cityscapes': 46 | classes = np.array(("road", "sidewalk", 47 | "building", "wall", "fence", "pole", 48 | "traffic_light", "traffic_sign", "vegetation", 49 | "terrain", "sky", "person", "rider", 50 | "car", "truck", "bus", 51 | "train", "motorcycle", "bicycle")) 52 | 53 | for i, iou in enumerate(j_list): 54 | print('class {:2d} {:12} IU {:.4f}'.format(i, classes[i], j_list[i])) 55 | 56 | print('meanIOU: ' + str(aveJ) + '\n') 57 | 58 | return aveJ 59 | 60 | 61 | def evaluate(model, dataset, deeplabv2=True, ignore_label=250, save_dir=None, pretraining='COCO'): 62 | model.eval() 63 | if pretraining == 'COCO': 64 | from utils.transformsgpu import normalize_bgr as normalize 65 | else: 66 | from utils.transformsgpu import normalize_rgb as normalize 67 | 68 | if dataset == 'pascal_voc': 69 | num_classes = 21 70 | data_loader = get_loader(dataset) 71 | data_path = get_data_path(dataset) 72 | test_dataset = data_loader(data_path, split="val", scale=False, mirror=False, pretraining=pretraining) 73 | testloader = data.DataLoader(test_dataset, batch_size=1, shuffle=False, pin_memory=True) 74 | 75 | elif dataset == 'cityscapes': 76 | num_classes = 19 77 | data_loader = get_loader('cityscapes') 78 | data_path = get_data_path('cityscapes') 79 | if deeplabv2: 80 | data_aug = Compose([Resize_city()]) 81 | else: # for deeplabv3 oirginal resolution 82 | data_aug = Compose([Resize_city_highres()]) 83 | 84 | test_dataset = data_loader(data_path, is_transform=True, split='val', 85 | augmentations=data_aug, pretraining=pretraining) 86 | testloader = data.DataLoader(test_dataset, batch_size=1, shuffle=False, pin_memory=True) 87 | 88 | print('Evaluating, found ' + str(len(testloader)) + ' images.') 89 | confM = ConfusionMatrix(num_classes) 90 | 91 | 92 | data_list = [] 93 | total_loss = [] 94 | 95 | for index, batch in enumerate(testloader): 96 | image, label, size, name, _ = batch 97 | 98 | with torch.no_grad(): 99 | interp = torch.nn.Upsample(size=(label.shape[1], label.shape[2]), mode='bilinear', align_corners=True) 100 | output = model(normalize(Variable(image).cuda(), dataset)) 101 | output = interp(output) 102 | 103 | label_cuda = Variable(label.long()).cuda() 104 | criterion = CrossEntropy2d(ignore_label=ignore_label).cuda() 105 | loss = criterion(output, label_cuda) 106 | total_loss.append(loss.item()) 107 | 108 | output = output.cpu().data[0].numpy() 109 | gt = np.asarray(label[0].numpy(), dtype=np.int) 110 | 111 | output = np.asarray(np.argmax(output, axis=0), dtype=np.int) 112 | data_list.append((np.reshape(gt, (-1)), np.reshape(output, (-1)))) 113 | 114 | # filename = 'output_images/' + name[0].split('/')[-1] 115 | # cv2.imwrite(filename, output) 116 | 117 | if (index + 1) % 100 == 0: 118 | # print('%d processed' % (index + 1)) 119 | process_list_evaluation(confM, data_list) 120 | data_list = [] 121 | 122 | process_list_evaluation(confM, data_list) 123 | 124 | mIoU = get_iou(confM, dataset) 125 | loss = np.mean(total_loss) 126 | return mIoU, loss 127 | 128 | 129 | def process_list_evaluation(confM, data_list): 130 | if len(data_list) > 0: 131 | f = confM.generateM 132 | pool = Pool(4) 133 | m_list = pool.map(f, data_list) 134 | pool.close() 135 | pool.join() 136 | pool.terminate() 137 | 138 | for m in m_list: 139 | confM.addM(m) 140 | 141 | 142 | 143 | def main(): 144 | """Create the model and start the evaluation process.""" 145 | 146 | deeplabv2 = "2" in config['version'] 147 | 148 | if deeplabv2: 149 | if pretraining == 'COCO': # coco and iamgenet resnet architectures differ a little, just on how to do the stride 150 | from model.deeplabv2 import Res_Deeplab 151 | else: # imagenet pretrained (more modern modification) 152 | from model.deeplabv2_imagenet import Res_Deeplab 153 | 154 | else: 155 | from model.deeplabv3 import Res_Deeplab 156 | 157 | model = Res_Deeplab(num_classes=num_classes) 158 | 159 | checkpoint = torch.load(args.model_path) 160 | model.load_state_dict(checkpoint['model']) 161 | 162 | model = model.cuda() 163 | model.eval() 164 | 165 | evaluate(model, dataset, deeplabv2=deeplabv2, ignore_label=ignore_label, pretraining=pretraining) 166 | 167 | 168 | if __name__ == '__main__': 169 | args = get_arguments() 170 | 171 | config = torch.load(args.model_path)['config'] 172 | 173 | dataset = config['dataset'] 174 | 175 | if dataset == 'cityscapes': 176 | num_classes = 19 177 | elif dataset == 'pascal_voc': 178 | num_classes = 21 179 | 180 | ignore_label = config['ignore_label'] 181 | 182 | pretraining = 'COCO' 183 | 184 | 185 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repository provides the official code for replicating experiments from the paper: 2 | **Semi-Supervised Semantic Segmentation with Pixel-Level Contrastive Learning from a Class-wise Memory Bank** which as been accepted as an **oral** paper in the IEEE International Conference on Computer Vision (**ICCV**) 2021. 3 | 4 | This code is based on [ClassMix code](https://github.com/WilhelmT/ClassMix) 5 | 6 | # Semi-Supervised Semantic Segmentation with Pixel-Level Contrastive Learning from a Class-wise Memory Bank 7 | 8 | ## Prerequisites 9 | * CUDA/CUDNN 10 | * Python3 11 | * Packages found in requirements.txt 12 | 13 | 14 | ## Contact 15 | 16 | 17 | If any question, please either open a github issue or contact via email to: 18 | 19 | ## Datasets 20 | 21 | Create a folder outsite the code folder: 22 | ``` 23 | mkdir ../data/ 24 | ``` 25 | 26 | ### Cityscapes 27 | ``` 28 | mkdir ../data/CityScapes/ 29 | ``` 30 | Download the dataset from ([Link](https://www.cityscapes-dataset.com/)). 31 | 32 | Download the files named 'gtFine_trainvaltest.zip', 'leftImg8bit_trainvaltest.zip' and extract in ../data/Cityscapes/ 33 | 34 | ### Pascal VOC 2012 35 | ``` 36 | mkdir ../data/VOC2012/ 37 | ``` 38 | Download the dataset from ([Link](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/)). 39 | 40 | Download the file 'training/validation data' under 'Development kit' and extract in ../data/VOC2012/ 41 | 42 | ### GTA5 43 | ``` 44 | mkdir ../data/GTA5/ 45 | ``` 46 | Download the dataset from ([Link](https://download.visinf.tu-darmstadt.de/data/from_games/)). 47 | Unzip all the datasets parts to create an structure like this: 48 | ``` 49 | ../data/GTA5/images/val/*.png 50 | ../data/GTA5/images/train/*.png 51 | ../data/GTA5/labels/val/*.png 52 | ../data/GTA5/labels/train/*.png 53 | ``` 54 | 55 | Then, reformat the label images from colored images to training ids. 56 | For that, execute this: 57 | ``` 58 | python3 utils/translate_labels.py 59 | ``` 60 | 61 | ## Experiments 62 | 63 | Here there are some examples for replicating the experiments from the paper. 64 | Implementation details are specified in the paper (section 4.2) **any modification** could potentially affect to the final result. 65 | 66 | 67 | ### Semi-Supervised 68 | Search here for the desired configuration: 69 | ``` 70 | ls ./configs/ 71 | ``` 72 | For example, for this configuration: 73 | * Dataset: CityScapes 74 | * % of labels: 1/30 75 | * Pretrain: COCO 76 | * Split: 0 77 | * Network: Deeplabv2 78 | 79 | Execute: 80 | 81 | ``` 82 | python3 trainSSL.py --config ./configs/configSSL_city_1_30_split0_COCO.json 83 | ``` 84 | 85 | Another example, for this configuration: 86 | * Dataset: CityScapes 87 | * % of labels: 1/30 88 | * Pretrain: imagenet 89 | * Split: 0 90 | * Network: Deeplabv3+ 91 | 92 | Execute: 93 | 94 | ``` 95 | python3 trainSSL.py --config ./configs/configSSL_city_1_30_split0_v3.json 96 | ``` 97 | 98 | 99 | For example, for this configuration: 100 | * Dataset: PASCAL VOC 101 | * % of labels: 1/50 102 | * Pretrain: COCO 103 | * Split: 0 104 | 105 | Execute: 106 | 107 | ``` 108 | python3 trainSSL.py --config ./configs/configSSL_pascal_1_50_split0_COCO.json 109 | ``` 110 | 111 | For replicating paper experiments, just execute the training of the specific set-up to replicate. We already provide all the configuration files used in the paper. For modifying them and a detail description of all the parameters in the configuration files, check this example: 112 | 113 | #### Configuration File Description 114 | ``` 115 | { 116 | "model": "DeepLab", # Network architecture. Options: Deeplab 117 | "version": "2", # Version of the network architecture. Options: {2, 3} for deeplabv2 and deeplabv3+ 118 | "dataset": "cityscapes", # Dataset to use. Options: {"cityscapes", "pascal"} 119 | 120 | "training": { 121 | "batch_size": 5, # Batch size to use. Options: any integer 122 | "num_workers": 3, # Number of cpu workers (threads) to use for laoding the dataset. Options: any integer 123 | "optimizer": "SGD", # Optimizer to use. Options: {"SGD"} 124 | "momentum": 0.9, # momentum for SGD optimizer, Options: any float 125 | "num_iterations": 100000, # Number of iterations to train. Options: any integer 126 | "learning_rate": 2e-4, # Learning rate. Options: any float 127 | "lr_schedule": "Poly", # decay scheduler for the learning rate. Options: {"Poly"} 128 | "lr_schedule_power": 0.9, # Power value for the Poly scheduler. Options: any float 129 | "pretraining": "COCO", # Pretraining to use. Options: {"COCO", "imagenet"} 130 | "weight_decay": 5e-4, # Weight decay. Options: any float 131 | "use_teacher_train": true, # Whether to use the teacher network to generate pseudolabels. Use student otherwise. Options: boolean. 132 | "save_teacher_test": false, # Whether to save the teacher network as the model for testing. Use student otherwise. Options: boolean. 133 | 134 | "data": { 135 | "split_id_list": 0, # Data splits to use. Options: {0, 1, 2} for pre-computed splits. N >2 for random splits 136 | "labeled_samples": 744, # Number of labeled samples to use for supervised learning. The rest will be use without labels. Options: any integer 137 | "input_size": "512,512" # Image crop size Options: any integer tuple 138 | } 139 | 140 | }, 141 | "seed": 5555, # seed for randomization. Options: any integer 142 | "ignore_label": 250, # ignore label value. Options: any integer 143 | 144 | "utils": { 145 | "save_checkpoint_every": 10000, # The model will be saved every this number of iterations. Options: any integer 146 | "checkpoint_dir": "../saved/DeepLab", # Path to save the models. Options: any path 147 | "val_per_iter": 1000, # The model will be evaluated every this number of iterations. Options: any integer 148 | "save_best_model": true # Whether to use teacher model for generating the psuedolabels. The student model wil obe used otherwise. Options: boolean 149 | } 150 | } 151 | ``` 152 | 153 | 154 | ### Memory Restrictions 155 | 156 | All experiments have been run in an NVIDIA Tesla V100. To try to fit the training in a smaller GPU, try to follow this tips: 157 | 158 | * Reduce batch_size from the configuration file 159 | * Reduce input_size from the configuration file 160 | * Instead of using trainSSL.py use trainSSL_less_memory.py which optimized labeled and unlabeled data separate steps. 161 | 162 | 163 | 164 | For example, for this configuration: 165 | * Dataset: PASCAL VOC 166 | * % of labels: 1/50 167 | * Pretrain: COCO 168 | * Split: 0 169 | * Batch size: 8 170 | * Crop size: 256x256 171 | Execute: 172 | 173 | ``` 174 | python3 trainSSL_less_memory.py --config ./configs/configSSL_pascal_1_50_split2_COCO_reduced.json 175 | ``` 176 | 177 | 178 | ### Semi-Supervised Domain Adaptation 179 | 180 | Experiments for domain adaptation from GTA5 dataset to Cityscapes. 181 | 182 | For example, for configuration: 183 | * % of labels: 1/30 184 | * Pretrain: Imagenet 185 | * Split: 0 186 | 187 | Execute: 188 | ``` 189 | python3 trainSSL_domain_adaptation_targetCity.py --config ./configs/configSSL_city_1_30_split0_imagenet.json 190 | ``` 191 | 192 | ### Evaluation 193 | The training code will evaluate the training model every some specific number of iterations (modify the parameter val_per_iter in the configuration file). 194 | 195 | Best evaluated model will be printed at the end of the training. 196 | 197 | For every training, several weights will be saved under the path specified in the parameter checkpoint_dir of the configuration file. 198 | 199 | One model every save_checkpoint_every (see configuration file) will be saved, plus the best evaluated model. 200 | 201 | So, the model has trained we can already know the performance. 202 | 203 | For a later evaluation, just execute the next command specifying the model to evaluate in the model-path argument: 204 | ``` 205 | python3 evaluateSSL.py --model-path ../saved/DeepLab/best.pth 206 | ``` 207 | 208 | 209 | ### Citation 210 | If you find this work useful, please consider citing: 211 | ```BibTeX 212 | @inproceedings{alonso2021semi, 213 | title={Semi-Supervised Semantic Segmentation with Pixel-Level Contrastive Learning from a Class-wise Memory Bank}, 214 | author={Alonso, I{\~n}igo and Sabater, Alberto and Ferstl, David and Montesano, Luis and Murillo, Ana C}, 215 | booktitle={Proceedings of the IEEE International Conference on Computer Vision}, 216 | year={2021} 217 | } 218 | ``` 219 | 220 | # License 221 | Thi code is released under the Apache 2.0 license. Please see the [LICENSE](LICENSE) file for more information. 222 | -------------------------------------------------------------------------------- /utils/palette.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code taken from https://github.com/WilhelmT/ClassMix 3 | ''' 4 | 5 | def get_voc_palette(num_classes): 6 | n = num_classes 7 | palette = [0]*(n*3) 8 | for j in range(0,n): 9 | lab = j 10 | palette[j*3+0] = 0 11 | palette[j*3+1] = 0 12 | palette[j*3+2] = 0 13 | i = 0 14 | while (lab > 0): 15 | palette[j*3+0] |= (((lab >> 0) & 1) << (7-i)) 16 | palette[j*3+1] |= (((lab >> 1) & 1) << (7-i)) 17 | palette[j*3+2] |= (((lab >> 2) & 1) << (7-i)) 18 | i = i + 1 19 | lab >>= 3 20 | return palette 21 | 22 | ADE20K_palette = [0,0,0,120,120,120,180,120,120,6,230,230,80,50,50,4,200, 23 | 3,120,120,80,140,140,140,204,5,255,230,230,230,4,250,7,224, 24 | 5,255,235,255,7,150,5,61,120,120,70,8,255,51,255,6,82,143, 25 | 255,140,204,255,4,255,51,7,204,70,3,0,102,200,61,230,250,255, 26 | 6,51,11,102,255,255,7,71,255,9,224,9,7,230,220,220,220,255,9, 27 | 92,112,9,255,8,255,214,7,255,224,255,184,6,10,255,71,255,41, 28 | 10,7,255,255,224,255,8,102,8,255,255,61,6,255,194,7,255,122,8, 29 | 0,255,20,255,8,41,255,5,153,6,51,255,235,12,255,160,150,20,0, 30 | 163,255,140,140,140,250,10,15,20,255,0,31,255,0,255,31,0,255,224 31 | ,0,153,255,0,0,0,255,255,71,0,0,235,255,0,173,255,31,0,255,11,200, 32 | 200,255,82,0,0,255,245,0,61,255,0,255,112,0,255,133,255,0,0,255, 33 | 163,0,255,102,0,194,255,0,0,143,255,51,255,0,0,82,255,0,255,41,0, 34 | 255,173,10,0,255,173,255,0,0,255,153,255,92,0,255,0,255,255,0,245, 35 | 255,0,102,255,173,0,255,0,20,255,184,184,0,31,255,0,255,61,0,71,255, 36 | 255,0,204,0,255,194,0,255,82,0,10,255,0,112,255,51,0,255,0,194,255,0, 37 | 122,255,0,255,163,255,153,0,0,255,10,255,112,0,143,255,0,82,0,255,163, 38 | 255,0,255,235,0,8,184,170,133,0,255,0,255,92,184,0,255,255,0,31,0,184, 39 | 255,0,214,255,255,0,112,92,255,0,0,224,255,112,224,255,70,184,160,163, 40 | 0,255,153,0,255,71,255,0,255,0,163,255,204,0,255,0,143,0,255,235,133,255, 41 | 0,255,0,235,245,0,255,255,0,122,255,245,0,10,190,212,214,255,0,0,204,255, 42 | 20,0,255,255,255,0,0,153,255,0,41,255,0,255,204,41,0,255,41,255,0,173,0, 43 | 255,0,245,255,71,0,255,122,0,255,0,255,184,0,92,255,184,255,0,0,133,255, 44 | 255,214,0,25,194,194,102,255,0,92,0,255] 45 | 46 | CityScpates_palette = [128,64,128,244,35,232,70,70,70,102,102,156,190,153,153,153,153,153, 47 | 250,170,30,220,220,0,107,142,35,152,251,152,70,130,180,220,20,60,255,0,0,0,0,142, 48 | 0,0,70,0,60,100,0,80,100,0,0,230,119,11,32,128,192,0,0,64,128,128,64,128,0,192, 49 | 128,128,192,128,64,64,0,192,64,0,64,192,0,192,192,0,64,64,128,192,64,128,64,192, 50 | 128,192,192,128,0,0,64,128,0,64,0,128,64,128,128,64,0,0,192,128,0,192,0,128,192, 51 | 128,128,192,64,0,64,192,0,64,64,128,64,192,128,64,64,0,192,192,0,192,64,128,192, 52 | 192,128,192,0,64,64,128,64,64,0,192,64,128,192,64,0,64,192,128,64,192,0,192,192, 53 | 128,192,192,64,64,64,192,64,64,64,192,64,192,192,64,64,64,192,192,64,192,64,192, 54 | 192,192,192,192,32,0,0,160,0,0,32,128,0,160,128,0,32,0,128,160,0,128,32,128,128, 55 | 160,128,128,96,0,0,224,0,0,96,128,0,224,128,0,96,0,128,224,0,128,96,128,128,224, 56 | 128,128,32,64,0,160,64,0,32,192,0,160,192,0,32,64,128,160,64,128,32,192,128,160, 57 | 192,128,96,64,0,224,64,0,96,192,0,224,192,0,96,64,128,224,64,128,96,192,128,224, 58 | 192,128,32,0,64,160,0,64,32,128,64,160,128,64,32,0,192,160,0,192,32,128,192,160, 59 | 128,192,96,0,64,224,0,64,96,128,64,224,128,64,96,0,192,224,0,192,96,128,192,224, 60 | 128,192,32,64,64,160,64,64,32,192,64,160,192,64,32,64,192,160,64,192,32,192,192, 61 | 160,192,192,96,64,64,224,64,64,96,192,64,224,192,64,96,64,192,224,64,192,96,192, 62 | 192,224,192,192,0,32,0,128,32,0,0,160,0,128,160,0,0,32,128,128,32,128,0,160,128, 63 | 128,160,128,64,32,0,192,32,0,64,160,0,192,160,0,64,32,128,192,32,128,64,160,128, 64 | 192,160,128,0,96,0,128,96,0,0,224,0,128,224,0,0,96,128,128,96,128,0,224,128,128, 65 | 224,128,64,96,0,192,96,0,64,224,0,192,224,0,64,96,128,192,96,128,64,224,128,192, 66 | 224,128,0,32,64,128,32,64,0,160,64,128,160,64,0,32,192,128,32,192,0,160,192,128, 67 | 160,192,64,32,64,192,32,64,64,160,64,192,160,64,64,32,192,192,32,192,64,160,192, 68 | 192,160,192,0,96,64,128,96,64,0,224,64,128,224,64,0,96,192,128,96,192,0,224,192, 69 | 128,224,192,64,96,64,192,96,64,64,224,64,192,224,64,64,96,192,192,96,192,64,224, 70 | 192,192,224,192,32,32,0,160,32,0,32,160,0,160,160,0,32,32,128,160,32,128,32,160, 71 | 128,160,160,128,96,32,0,224,32,0,96,160,0,224,160,0,96,32,128,224,32,128,96,160, 72 | 128,224,160,128,32,96,0,160,96,0,32,224,0,160,224,0,32,96,128,160,96,128,32,224, 73 | 128,160,224,128,96,96,0,224,96,0,96,224,0,224,224,0,96,96,128,224,96,128,96,224, 74 | 128,224,224,128,32,32,64,160,32,64,32,160,64,160,160,64,32,32,192,160,32,192,32, 75 | 160,192,160,160,192,96,32,64,224,32,64,96,160,64,224,160,64,96,32,192,224,32,192, 76 | 96,160,192,224,160,192,32,96,64,160,96,64,32,224,64,160,224,64,32,96,192,160,96, 77 | 192,32,224,192,160,224,192,96,96,64,224,96,64,96,224,64,224,224,64,96,96,192,224, 78 | 96,192,96,224,192,0,0,0] 79 | 80 | 81 | COCO_palette = [31, 119, 180, 255, 127, 14, 44, 160, 44, 214, 39, 40, 148, 103, 189, 140, 86, 75, 227, 82 | 119, 194, 127, 127, 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 14, 44, 160, 44, 83 | 214, 39, 40, 148, 103, 189, 140, 86, 75, 227, 119, 194, 127, 127, 127, 188, 189, 34, 23, 190, 207, 84 | 31, 119, 180, 255, 127, 14, 44, 160, 44, 214, 39, 40, 148, 103, 189, 140, 86, 75, 85 | 227, 119, 194, 127, 127, 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 14, 44, 160, 44, 86 | 214, 39, 40, 148, 103, 189, 140, 86, 75, 227, 119, 194, 127, 127, 127, 188, 189, 87 | 34, 23, 190, 207, 31, 119, 180, 255, 127, 14, 44, 160, 44, 214, 39, 40, 148, 103, 189, 140, 86, 75, 88 | 227, 119, 194, 127, 127, 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 89 | 14, 44, 160, 44, 214, 39, 40, 148, 103, 189, 140, 86, 75, 227, 119, 194, 127, 127, 127, 188, 189, 90 | 34, 23, 190, 207, 31, 119, 180, 255, 127, 14, 44, 160, 44, 214, 39, 40, 148, 103, 91 | 189, 140, 86, 75, 227, 119, 194, 127, 127, 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 92 | 14, 44, 160, 44, 214, 39, 40, 148, 103, 189, 140, 86, 75, 227, 119, 194, 127, 127 93 | , 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 14, 44, 160, 44, 214, 39, 40, 148, 103, 94 | 189, 140, 86, 75, 227, 119, 194, 127, 127, 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 14, 95 | 44, 160, 44, 214, 39, 40, 148, 103, 189, 140, 86, 75, 227, 119, 194, 127, 127, 96 | 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 14, 44, 160, 44, 214, 39, 40, 148, 103, 189, 97 | 140, 86, 75, 227, 119, 194, 127, 127, 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 14, 44, 98 | 160, 44, 214, 39, 40, 148, 103, 189, 140, 86, 75, 227, 119, 194, 127, 127, 127, 188, 189, 34, 23, 190, 99 | 207, 31, 119, 180, 255, 127, 14, 44, 160, 44, 214, 39, 40, 148, 103, 189, 140, 86, 75, 227, 119, 194, 100 | 127, 127, 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 14, 44, 160, 44, 214, 39, 40, 148, 101 | 103, 189, 140, 86, 75, 227, 119, 194, 127, 127, 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 102 | 14, 44, 160, 44, 214, 39, 40, 148, 103, 189, 140, 86, 75, 227, 119, 194, 127, 127, 127, 188, 189, 34, 103 | 23, 190, 207, 31, 119, 180, 255, 127, 14, 44, 160, 44, 214, 39, 40, 148, 103, 189, 140, 86, 75, 227, 104 | 119, 194, 127, 127, 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 14, 44, 160, 44, 214, 39, 105 | 40, 148, 103, 189, 140, 86, 75, 227, 119, 194, 127, 127, 127, 188, 189, 34, 23, 190, 207, 31, 119, 106 | 180, 255, 127, 14, 44, 160, 44, 214, 39, 40, 148, 103, 189, 140, 86, 75, 227, 119, 194, 127, 127, 107 | 127, 188, 189, 34, 23, 190, 207, 31, 119, 180, 255, 127, 14] -------------------------------------------------------------------------------- /model/deeplabv2_imagenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the implementation of DeepLabv2 without multi-scale inputs. This implementation uses ResNet-101 as backbone. 3 | 4 | This deeplab is used with imagenet pretraining to match the current pytorch implementation that provides these weights. 5 | This implementation follows the new implementation of Resnet bottleneck module where the stride is performed in the 3x3 conv. 6 | 7 | Code taken from https://github.com/WilhelmT/ClassMix 8 | Slightly modified 9 | """ 10 | 11 | import torch.nn as nn 12 | import numpy as np 13 | affine_par = True 14 | 15 | 16 | def outS(i): 17 | i = int(i) 18 | i = (i+1)/2 19 | i = int(np.ceil((i+1)/2.0)) 20 | i = (i+1)/2 21 | return i 22 | 23 | def conv3x3(in_planes, out_planes, stride=1): 24 | "3x3 convolution with padding" 25 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 26 | padding=1, bias=False) 27 | 28 | 29 | class Bottleneck(nn.Module): 30 | expansion = 4 31 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None): 32 | super(Bottleneck, self).__init__() 33 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) # change 34 | self.bn1 = nn.BatchNorm2d(planes,affine = affine_par) 35 | for i in self.bn1.parameters(): 36 | i.requires_grad = False 37 | 38 | padding = dilation 39 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, # change 40 | padding=padding, bias=False, dilation = dilation) 41 | self.bn2 = nn.BatchNorm2d(planes,affine = affine_par) 42 | for i in self.bn2.parameters(): 43 | i.requires_grad = False 44 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 45 | self.bn3 = nn.BatchNorm2d(planes * 4, affine = affine_par) 46 | for i in self.bn3.parameters(): 47 | i.requires_grad = False 48 | self.relu = nn.ReLU(inplace=True) 49 | self.downsample = downsample 50 | self.stride = stride 51 | 52 | 53 | def forward(self, x): 54 | residual = x 55 | 56 | out = self.conv1(x) 57 | out = self.bn1(out) 58 | out = self.relu(out) 59 | 60 | out = self.conv2(out) 61 | out = self.bn2(out) 62 | out = self.relu(out) 63 | 64 | out = self.conv3(out) 65 | out = self.bn3(out) 66 | 67 | if self.downsample is not None: 68 | residual = self.downsample(x) 69 | 70 | out += residual 71 | out = self.relu(out) 72 | 73 | return out 74 | 75 | class Classifier_Module(nn.Module): 76 | 77 | def __init__(self, dilation_series, padding_series, num_classes): 78 | super(Classifier_Module, self).__init__() 79 | self.conv2d_list = nn.ModuleList() 80 | for dilation, padding in zip(dilation_series, padding_series): 81 | self.conv2d_list.append(nn.Conv2d(2048, num_classes, kernel_size=3, stride=1, padding=padding, dilation=dilation, bias = True)) 82 | 83 | for m in self.conv2d_list: 84 | m.weight.data.normal_(0, 0.01) 85 | 86 | def forward(self, x): 87 | out = self.conv2d_list[0](x) 88 | for i in range(len(self.conv2d_list)-1): 89 | out += self.conv2d_list[i+1](x) 90 | return out 91 | 92 | 93 | 94 | class ResNet(nn.Module): 95 | def __init__(self, block, layers, num_classes): 96 | self.inplanes = 64 97 | super(ResNet, self).__init__() 98 | self.num_classes= num_classes 99 | 100 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 101 | bias=False) 102 | self.bn1 = nn.BatchNorm2d(64, affine = affine_par) 103 | for i in self.bn1.parameters(): 104 | i.requires_grad = False 105 | self.relu = nn.ReLU(inplace=True) 106 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1,) # change 107 | self.layer1 = self._make_layer(block, 64, layers[0]) 108 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 109 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2) 110 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4) 111 | self.layer5 = self._make_pred_layer(Classifier_Module, [6,12,18,24],[6,12,18,24],num_classes) 112 | dim_in = 2048 113 | feat_dim = 256 114 | self.projection_head = nn.Sequential( 115 | nn.Linear(dim_in, feat_dim), 116 | nn.BatchNorm1d(feat_dim), 117 | nn.ReLU(inplace=True), 118 | nn.Linear(feat_dim, feat_dim) 119 | ) 120 | self.prediction_head = nn.Sequential( 121 | nn.Linear(feat_dim, feat_dim), 122 | nn.BatchNorm1d(feat_dim), 123 | nn.ReLU(inplace=True), 124 | nn.Linear(feat_dim, feat_dim) 125 | ) 126 | 127 | for class_c in range(num_classes): 128 | selector = nn.Sequential( 129 | nn.Linear(feat_dim, feat_dim), 130 | nn.BatchNorm1d(feat_dim), 131 | nn.LeakyReLU(negative_slope=0.2, inplace=True), 132 | nn.Linear(feat_dim, 1) 133 | ) 134 | self.__setattr__('contrastive_class_selector_' + str(class_c), selector) 135 | 136 | for class_c in range(num_classes): 137 | selector = nn.Sequential( 138 | nn.Linear(feat_dim, feat_dim), 139 | nn.BatchNorm1d(feat_dim), 140 | nn.LeakyReLU(negative_slope=0.2, inplace=True), 141 | nn.Linear(feat_dim, 1) 142 | ) 143 | self.__setattr__('contrastive_class_selector_memory' + str(class_c), selector) 144 | 145 | 146 | for m in self.modules(): 147 | if isinstance(m, nn.Conv2d): 148 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 149 | m.weight.data.normal_(0, 0.01) 150 | elif isinstance(m, nn.BatchNorm2d): 151 | m.weight.data.fill_(1) 152 | m.bias.data.zero_() 153 | 154 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1): 155 | downsample = None 156 | if stride != 1 or self.inplanes != planes * block.expansion or dilation == 2 or dilation == 4: 157 | downsample = nn.Sequential( 158 | nn.Conv2d(self.inplanes, planes * block.expansion, 159 | kernel_size=1, stride=stride, bias=False), 160 | nn.BatchNorm2d(planes * block.expansion,affine = affine_par)) 161 | for i in downsample._modules['1'].parameters(): 162 | i.requires_grad = False 163 | layers = [] 164 | layers.append(block(self.inplanes, planes, stride,dilation=dilation, downsample=downsample)) 165 | self.inplanes = planes * block.expansion 166 | for i in range(1, blocks): 167 | layers.append(block(self.inplanes, planes, dilation=dilation)) 168 | 169 | return nn.Sequential(*layers) 170 | 171 | def _make_pred_layer(self,block, dilation_series, padding_series,num_classes): 172 | return block(dilation_series,padding_series,num_classes) 173 | 174 | 175 | def forward_projection_head(self, features): 176 | return self.projection_head(features) 177 | 178 | def forward_prediction_head(self, features): 179 | return self.prediction_head(features) 180 | 181 | def forward(self, x, return_features=False): 182 | x = self.conv1(x) 183 | x = self.bn1(x) 184 | x = self.relu(x) 185 | x = self.maxpool(x) 186 | x = self.layer1(x) 187 | x = self.layer2(x) 188 | x = self.layer3(x) 189 | features = self.layer4(x) 190 | x = self.layer5(features) 191 | 192 | if return_features: 193 | return x, features 194 | else: 195 | return x 196 | 197 | 198 | def get_1x_lr_params(self): 199 | """ 200 | This generator returns all the parameters of the net except for 201 | the last classification layer. Note that for each batchnorm layer, 202 | requires_grad is set to False in deeplab_resnet.py, therefore this function does not return 203 | any batchnorm parameter 204 | """ 205 | b = [] 206 | 207 | b.append(self.conv1) 208 | b.append(self.bn1) 209 | b.append(self.layer1) 210 | b.append(self.layer2) 211 | b.append(self.layer3) 212 | b.append(self.layer4) 213 | b.append(self.layer5) 214 | b.append(self.projection_head) 215 | b.append(self.prediction_head) 216 | 217 | for class_c in range(self.num_classes): 218 | b.append(self.__getattr__('contrastive_class_selector_' + str(class_c))) 219 | b.append(self.__getattr__('contrastive_class_selector_memory' + str(class_c))) 220 | 221 | for i in range(len(b)): 222 | for k in b[i].parameters(): 223 | if k.requires_grad: 224 | yield k 225 | 226 | 227 | def optim_parameters(self, args): 228 | # TODO: change names 229 | return [{'params': self.get_1x_lr_params(), 'lr': args.learning_rate}] 230 | 231 | 232 | def Res_Deeplab(num_classes): 233 | model = ResNet(Bottleneck,[3, 4, 23, 3], num_classes) 234 | return model 235 | -------------------------------------------------------------------------------- /model/deeplabv2.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the implementation of DeepLabv2 without multi-scale inputs. This implementation uses ResNet-101 as backbone. 3 | 4 | Old implementation of Resnet bottleneck module where the stride is performed in the first 1x1 conv. 5 | Used for the model pretrained in coco from https://github.com/hfslyc/AdvSemiSeg 6 | 7 | Code taken from https://github.com/WilhelmT/ClassMix 8 | Slightly modified 9 | """ 10 | 11 | import torch.nn as nn 12 | import math 13 | import torch.utils.model_zoo as model_zoo 14 | import torch 15 | import numpy as np 16 | affine_par = True 17 | 18 | 19 | def outS(i): 20 | i = int(i) 21 | i = (i+1)/2 22 | i = int(np.ceil((i+1)/2.0)) 23 | i = (i+1)/2 24 | return i 25 | 26 | def conv3x3(in_planes, out_planes, stride=1): 27 | "3x3 convolution with padding" 28 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 29 | padding=1, bias=False) 30 | 31 | 32 | 33 | class Bottleneck(nn.Module): 34 | expansion = 4 35 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None): 36 | super(Bottleneck, self).__init__() 37 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change 38 | self.bn1 = nn.BatchNorm2d(planes,affine = affine_par) 39 | for i in self.bn1.parameters(): 40 | i.requires_grad = False 41 | 42 | padding = dilation 43 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change 44 | padding=padding, bias=False, dilation = dilation) 45 | self.bn2 = nn.BatchNorm2d(planes,affine = affine_par) 46 | for i in self.bn2.parameters(): 47 | i.requires_grad = False 48 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 49 | self.bn3 = nn.BatchNorm2d(planes * 4, affine = affine_par) 50 | for i in self.bn3.parameters(): 51 | i.requires_grad = False 52 | self.relu = nn.ReLU(inplace=True) 53 | self.downsample = downsample 54 | self.stride = stride 55 | 56 | 57 | def forward(self, x): 58 | residual = x 59 | 60 | out = self.conv1(x) 61 | out = self.bn1(out) 62 | out = self.relu(out) 63 | 64 | out = self.conv2(out) 65 | out = self.bn2(out) 66 | out = self.relu(out) 67 | 68 | out = self.conv3(out) 69 | out = self.bn3(out) 70 | 71 | if self.downsample is not None: 72 | residual = self.downsample(x) 73 | 74 | out += residual 75 | out = self.relu(out) 76 | 77 | return out 78 | 79 | class Classifier_Module(nn.Module): 80 | 81 | def __init__(self, dilation_series, padding_series, num_classes): 82 | super(Classifier_Module, self).__init__() 83 | self.conv2d_list = nn.ModuleList() 84 | for dilation, padding in zip(dilation_series, padding_series): 85 | self.conv2d_list.append(nn.Conv2d(2048, num_classes, kernel_size=3, stride=1, padding=padding, dilation=dilation, bias = True)) 86 | 87 | for m in self.conv2d_list: 88 | m.weight.data.normal_(0, 0.01) 89 | 90 | def forward(self, x): 91 | out = self.conv2d_list[0](x) 92 | for i in range(len(self.conv2d_list)-1): 93 | out += self.conv2d_list[i+1](x) 94 | return out 95 | 96 | 97 | 98 | class ResNet(nn.Module): 99 | def __init__(self, block, layers, num_classes): 100 | self.inplanes = 64 101 | super(ResNet, self).__init__() 102 | self.num_classes= num_classes 103 | 104 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 105 | bias=False) 106 | self.bn1 = nn.BatchNorm2d(64, affine = affine_par) 107 | for i in self.bn1.parameters(): 108 | i.requires_grad = False 109 | self.relu = nn.ReLU(inplace=True) 110 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change 111 | self.layer1 = self._make_layer(block, 64, layers[0]) 112 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 113 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2) 114 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4) 115 | self.layer5 = self._make_pred_layer(Classifier_Module, [6,12,18,24],[6,12,18,24],num_classes) 116 | dim_in = 2048 117 | feat_dim = 256 118 | self.projection_head = nn.Sequential( 119 | nn.Linear(dim_in, feat_dim), 120 | nn.BatchNorm1d(feat_dim), 121 | nn.ReLU(inplace=True), 122 | nn.Linear(feat_dim, feat_dim) 123 | ) 124 | self.prediction_head = nn.Sequential( 125 | nn.Linear(feat_dim, feat_dim), 126 | nn.BatchNorm1d(feat_dim), 127 | nn.ReLU(inplace=True), 128 | nn.Linear(feat_dim, feat_dim) 129 | ) 130 | 131 | for class_c in range(num_classes): 132 | selector = nn.Sequential( 133 | nn.Linear(feat_dim, feat_dim), 134 | nn.BatchNorm1d(feat_dim), 135 | nn.LeakyReLU(negative_slope=0.2, inplace=True), 136 | nn.Linear(feat_dim, 1) 137 | ) 138 | self.__setattr__('contrastive_class_selector_' + str(class_c), selector) 139 | 140 | for class_c in range(num_classes): 141 | selector = nn.Sequential( 142 | nn.Linear(feat_dim, feat_dim), 143 | nn.BatchNorm1d(feat_dim), 144 | nn.LeakyReLU(negative_slope=0.2, inplace=True), 145 | nn.Linear(feat_dim, 1) 146 | ) 147 | self.__setattr__('contrastive_class_selector_memory' + str(class_c), selector) 148 | 149 | 150 | 151 | for m in self.modules(): 152 | if isinstance(m, nn.Conv2d): 153 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 154 | m.weight.data.normal_(0, 0.01) 155 | elif isinstance(m, nn.BatchNorm2d): 156 | m.weight.data.fill_(1) 157 | m.bias.data.zero_() 158 | 159 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1): 160 | downsample = None 161 | if stride != 1 or self.inplanes != planes * block.expansion or dilation == 2 or dilation == 4: 162 | downsample = nn.Sequential( 163 | nn.Conv2d(self.inplanes, planes * block.expansion, 164 | kernel_size=1, stride=stride, bias=False), 165 | nn.BatchNorm2d(planes * block.expansion,affine = affine_par)) 166 | for i in downsample._modules['1'].parameters(): 167 | i.requires_grad = False 168 | layers = [] 169 | layers.append(block(self.inplanes, planes, stride,dilation=dilation, downsample=downsample)) 170 | self.inplanes = planes * block.expansion 171 | for i in range(1, blocks): 172 | layers.append(block(self.inplanes, planes, dilation=dilation)) 173 | 174 | return nn.Sequential(*layers) 175 | 176 | def _make_pred_layer(self,block, dilation_series, padding_series,num_classes): 177 | return block(dilation_series,padding_series,num_classes) 178 | 179 | 180 | def forward_projection_head(self, features): 181 | return self.projection_head(features) 182 | 183 | def forward_prediction_head(self, features): 184 | return self.prediction_head(features) 185 | 186 | 187 | def forward(self, x, return_features=False): 188 | x = self.conv1(x) 189 | x = self.bn1(x) 190 | x = self.relu(x) 191 | x = self.maxpool(x) 192 | x = self.layer1(x) 193 | x = self.layer2(x) 194 | x = self.layer3(x) 195 | features = self.layer4(x) 196 | x = self.layer5(features) 197 | 198 | if return_features: 199 | return x, features 200 | else: 201 | return x 202 | 203 | def get_1x_lr_params(self): 204 | """ 205 | This generator returns all the parameters of the net except for 206 | the last classification layer. Note that for each batchnorm layer, 207 | requires_grad is set to False in deeplab_resnet.py, therefore this function does not return 208 | any batchnorm parameter 209 | """ 210 | b = [] 211 | 212 | b.append(self.conv1) 213 | b.append(self.bn1) 214 | b.append(self.layer1) 215 | b.append(self.layer2) 216 | b.append(self.layer3) 217 | b.append(self.layer4) 218 | b.append(self.layer5) 219 | b.append(self.projection_head) 220 | b.append(self.prediction_head) 221 | 222 | for class_c in range(self.num_classes): 223 | b.append(self.__getattr__('contrastive_class_selector_' + str(class_c))) 224 | b.append(self.__getattr__('contrastive_class_selector_memory' + str(class_c))) 225 | 226 | for i in range(len(b)): 227 | for k in b[i].parameters(): 228 | if k.requires_grad: 229 | yield k 230 | 231 | 232 | def optim_parameters(self, args): 233 | # TODO: change names 234 | return [{'params': self.get_1x_lr_params(), 'lr': args.learning_rate}] 235 | 236 | 237 | def Res_Deeplab(num_classes): 238 | model = ResNet(Bottleneck,[3, 4, 23, 3], num_classes) 239 | return model 240 | -------------------------------------------------------------------------------- /utils/color_city.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import numpy as np 4 | import glob 5 | import cv2 6 | import random 7 | import scipy 8 | import argparse 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--input_dir", help="Dataset to train", default='./out_dir/Datasets/cityscapes') 12 | parser.add_argument("--output_dir", help="Dataset to train", default='./out_dir/Datasets/cityscapes_colored') 13 | args = parser.parse_args() 14 | from collections import namedtuple 15 | 16 | 17 | input_dir = args.input_dir 18 | output_dir = args.output_dir 19 | 20 | #-------------------------------------------------------------------------------- 21 | # Definitions 22 | #-------------------------------------------------------------------------------- 23 | 24 | # a label and all meta information 25 | Label = namedtuple( 'Label' , [ 26 | 27 | 'name' , # The identifier of this label, e.g. 'car', 'person', ... . 28 | # We use them to uniquely name a class 29 | 30 | 'id' , # An integer ID that is associated with this label. 31 | # The IDs are used to represent the label in ground truth images 32 | # An ID of -1 means that this label does not have an ID and thus 33 | # is ignored when creating ground truth images (e.g. license plate). 34 | # Do not modify these IDs, since exactly these IDs are expected by the 35 | # evaluation server. 36 | 37 | 'trainId' , # Feel free to modify these IDs as suitable for your method. Then create 38 | # ground truth images with train IDs, using the tools provided in the 39 | # 'preparation' folder. However, make sure to validate or submit results 40 | # to our evaluation server using the regular IDs above! 41 | # For trainIds, multiple labels might have the same ID. Then, these labels 42 | # are mapped to the same class in the ground truth images. For the inverse 43 | # mapping, we use the label that is defined first in the list below. 44 | # For example, mapping all void-type classes to the same ID in training, 45 | # might make sense for some approaches. 46 | # Max value is 255! 47 | 48 | 'category' , # The name of the category that this label belongs to 49 | 50 | 'categoryId' , # The ID of this category. Used to create ground truth images 51 | # on category level. 52 | 53 | 'hasInstances', # Whether this label distinguishes between single instances or not 54 | 55 | 'ignoreInEval', # Whether pixels having this class as ground truth label are ignored 56 | # during evaluations or not 57 | 58 | 'color' , # The color of this label 59 | ] ) 60 | 61 | 62 | #-------------------------------------------------------------------------------- 63 | # A list of all labels 64 | #-------------------------------------------------------------------------------- 65 | 66 | # Please adapt the train IDs as appropriate for your approach. 67 | # Note that you might want to ignore labels with ID 255 during training. 68 | # Further note that the current train IDs are only a suggestion. You can use whatever you like. 69 | # Make sure to provide your results using the original IDs and not the training IDs. 70 | # Note that many IDs are ignored in evaluation and thus you never need to predict these! 71 | 72 | labels = [ 73 | # name id trainId category catId hasInstances ignoreInEval color 74 | Label( 'unlabeled' , 0 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ), 75 | Label( 'ego vehicle' , 1 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ), 76 | Label( 'rectification border' , 2 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ), 77 | Label( 'out of roi' , 3 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ), 78 | Label( 'static' , 4 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ), 79 | Label( 'dynamic' , 5 , 255 , 'void' , 0 , False , True , (111, 74, 0) ), 80 | Label( 'ground' , 6 , 255 , 'void' , 0 , False , True , ( 81, 0, 81) ), 81 | Label( 'road' , 7 , 0 , 'flat' , 1 , False , False , (128, 64,128) ), 82 | Label( 'sidewalk' , 8 , 1 , 'flat' , 1 , False , False , (244, 35,232) ), 83 | Label( 'parking' , 9 , 255 , 'flat' , 1 , False , True , (250,170,160) ), 84 | Label( 'rail track' , 10 , 255 , 'flat' , 1 , False , True , (230,150,140) ), 85 | Label( 'building' , 11 , 2 , 'construction' , 2 , False , False , ( 70, 70, 70) ), 86 | Label( 'wall' , 12 , 3 , 'construction' , 2 , False , False , (102,102,156) ), 87 | Label( 'fence' , 13 , 4 , 'construction' , 2 , False , False , (190,153,153) ), 88 | Label( 'guard rail' , 14 , 255 , 'construction' , 2 , False , True , (180,165,180) ), 89 | Label( 'bridge' , 15 , 255 , 'construction' , 2 , False , True , (150,100,100) ), 90 | Label( 'tunnel' , 16 , 255 , 'construction' , 2 , False , True , (150,120, 90) ), 91 | Label( 'pole' , 17 , 5 , 'object' , 3 , False , False , (153,153,153) ), 92 | Label( 'polegroup' , 18 , 255 , 'object' , 3 , False , True , (153,153,153) ), 93 | Label( 'traffic light' , 19 , 6 , 'object' , 3 , False , False , (250,170, 30) ), 94 | Label( 'traffic sign' , 20 , 7 , 'object' , 3 , False , False , (220,220, 0) ), 95 | Label( 'vegetation' , 21 , 8 , 'nature' , 4 , False , False , (107,142, 35) ), 96 | Label( 'terrain' , 22 , 9 , 'nature' , 4 , False , False , (152,251,152) ), 97 | Label( 'sky' , 23 , 10 , 'sky' , 5 , False , False , ( 70,130,180) ), 98 | Label( 'person' , 24 , 11 , 'human' , 6 , True , False , (220, 20, 60) ), 99 | Label( 'rider' , 25 , 12 , 'human' , 6 , True , False , (255, 0, 0) ), 100 | Label( 'car' , 26 , 13 , 'vehicle' , 7 , True , False , ( 0, 0,142) ), 101 | Label( 'truck' , 27 , 14 , 'vehicle' , 7 , True , False , ( 0, 0, 70) ), 102 | Label( 'bus' , 28 , 15 , 'vehicle' , 7 , True , False , ( 0, 60,100) ), 103 | Label( 'caravan' , 29 , 255 , 'vehicle' , 7 , True , True , ( 0, 0, 90) ), 104 | Label( 'trailer' , 30 , 255 , 'vehicle' , 7 , True , True , ( 0, 0,110) ), 105 | Label( 'train' , 31 , 16 , 'vehicle' , 7 , True , False , ( 0, 80,100) ), 106 | Label( 'motorcycle' , 32 , 17 , 'vehicle' , 7 , True , False , ( 0, 0,230) ), 107 | Label( 'bicycle' , 33 , 18 , 'vehicle' , 7 , True , False , (119, 11, 32) ), 108 | Label( 'license plate' , -1 , -1 , 'vehicle' , 7 , False , True , ( 0, 0,142) ), 109 | ] 110 | 111 | 112 | #-------------------------------------------------------------------------------- 113 | # Create dictionaries for a fast lookup 114 | #-------------------------------------------------------------------------------- 115 | 116 | # Please refer to the main method below for example usages! 117 | 118 | # name to label object 119 | name2label = { label.name : label for label in labels } 120 | # id to label object 121 | id2label = { label.id : label for label in labels } 122 | # trainId to label object 123 | trainId2label = { label.trainId : label for label in reversed(labels) } 124 | 125 | 126 | 127 | 128 | def fromIdTraintoColor(imgin, imgout): 129 | for id in id2label: 130 | 131 | color = (id2label[id].color[2], id2label[id].color[1], id2label[id].color[0]) 132 | imgout[imgin==id2label[id].trainId] = color 133 | 134 | imgout[imgin > 18] = 0 135 | return imgout 136 | 137 | 138 | if not os.path.exists(output_dir): 139 | os.makedirs(output_dir) 140 | 141 | 142 | outputs = glob.glob(input_dir + '/*') 143 | for output in outputs: 144 | name = output.split('/')[-1] 145 | output_name = output_dir + '/' + name 146 | print(output_name) 147 | img = cv2.imread(output, 0) 148 | imgout = cv2.imread(output, 1) 149 | imgout = fromIdTraintoColor(img, imgout) 150 | cv2.imwrite(output_name, imgout) -------------------------------------------------------------------------------- /utils/transformsgpu.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code taken from https://github.com/WilhelmT/ClassMix 3 | Slightly modified 4 | ''' 5 | 6 | import kornia 7 | import torch 8 | import random 9 | import torch.nn as nn 10 | 11 | 12 | def normalize_rgb(data, dataset): 13 | """ 14 | 15 | Args: 16 | data: data to normalize BxCxWxH 17 | dataset: name of the dataset to normalize 18 | 19 | Returns: 20 | normalized data as (x-mean)/255 21 | 22 | """ 23 | if dataset == 'pascal_voc': 24 | mean = (122.6789143, 116.66876762, 104.00698793) # rgb 25 | elif dataset == 'cityscapes': 26 | mean = (73.15835921, 82.90891754, 72.39239876) # rgb 27 | else: 28 | mean = (127.5, 127.5, 127.5 ) 29 | 30 | mean = torch.Tensor(mean).unsqueeze(0).unsqueeze(2).unsqueeze(3).cuda() 31 | data_norm = ((data-mean)/255.0) 32 | return data_norm 33 | 34 | 35 | def normalize_bgr(data, dataset): 36 | """ 37 | 38 | Args: 39 | data: data to normalize BxCxWxH 40 | dataset: name of the dataset to normalize 41 | 42 | Returns: 43 | normalized data as (x-mean)/255 44 | 45 | """ 46 | if dataset == 'pascal_voc': 47 | mean = (104.00698793, 116.66876762, 122.6789143) # bgr 48 | elif dataset == 'cityscapes': 49 | mean = (72.39239876, 82.90891754, 73.15835921) # bgr 50 | else: 51 | mean = (127.5, 127.5, 127.5 ) 52 | 53 | mean = torch.Tensor(mean).unsqueeze(0).unsqueeze(2).unsqueeze(3).cuda() 54 | data_norm = ((data-mean)/255.0) 55 | return data_norm 56 | 57 | 58 | 59 | def grayscale(grayscale, data = None, target = None, probs = None): 60 | """ 61 | 62 | Args: 63 | grayscale: boolean whether to apply grayscale augmentation 64 | data: input data to augment BxCxWxH 65 | target: labels to augment BxWxH 66 | probs: probability masks to augment BxCxWxH 67 | 68 | Returns: 69 | data is converted from rgb to grayscale if [grayscale] is True 70 | target and probs are also returned with no modifications applied 71 | 72 | """ 73 | if not (data is None): 74 | if grayscale and data.shape[1]==3: 75 | seq = nn.Sequential(kornia.augmentation.RandomGrayscale(p=1.) ) 76 | data = seq(data) 77 | return data, target, probs 78 | 79 | def colorJitter(colorJitter, data = None, target = None, s=0.1, probs = None): 80 | """ 81 | 82 | Args: 83 | colorJitter: boolean whether to apply colorJitter augmentation 84 | data: input data to augment BxCxWxH 85 | target: labels to augment BxWxH 86 | probs: probability masks to augment BxCxWxH 87 | s: brightness and contrast strength of the color jitter 88 | 89 | Returns: 90 | colorJitter is applied to data if [colorJitter] is True 91 | target and probs are also returned with no modifications applied 92 | 93 | 94 | """ 95 | if not (data is None): 96 | if colorJitter and data.shape[1]==3: 97 | seq = nn.Sequential(kornia.augmentation.ColorJitter(brightness=s,contrast=s,saturation=s/2.,hue=s/3.)) 98 | data = seq(data/255.)*255. # assumes [0,1] 99 | return data, target, probs 100 | 101 | def gaussian_blur(blur, data = None, target = None, min_sigma=0.2, max_sigma=3, probs = None): 102 | """ 103 | 104 | Args: 105 | blur: boolean whether to apply blur 106 | data: input data to augment BxCxWxH 107 | target: labels to augment BxWxH 108 | probs: probability masks to augment BxCxWxH 109 | min_sigma: minimum sigma value for the gaussian blur 110 | max_sigma: maximum sigma value for the gaussian blur 111 | 112 | Returns: 113 | gaussian blur is applied to data if [blur] is True 114 | target and probs are also returned with no modifications applied 115 | 116 | """ 117 | if not (data is None): 118 | if blur and data.shape[1]==3: 119 | seq = nn.Sequential(kornia.filters.GaussianBlur2d(kernel_size=(23, 23), sigma=(min_sigma, max_sigma))) 120 | data = seq(data) 121 | return data, target, probs 122 | 123 | def flip(flip, data = None, target = None, probs = None): 124 | """ 125 | 126 | Args: 127 | flip: boolean whether to apply flip augmentation 128 | data: input data to augment BxCxWxH 129 | target: labels to augment BxWxH 130 | probs: probability masks to augment BxCxWxH 131 | 132 | Returns: 133 | data, target and probs are flipped if the boolean flip is True 134 | 135 | """ 136 | if flip: 137 | if not (data is None): data = torch.flip(data,(3,)) 138 | if not (target is None): 139 | target = torch.flip(target,(2,)) 140 | if not (probs is None): 141 | probs = torch.flip(probs,(2,)) 142 | return data, target, probs 143 | 144 | def solarize(solarize, data = None, target = None, probs = None): 145 | """ 146 | 147 | Args: 148 | solarize: boolean whether to apply solarize augmentation 149 | data: input data to augment BxCxWxH 150 | target: labels to augment BxWxH 151 | probs: probability masks to augment BxCxWxH 152 | 153 | Returns: 154 | data, target, probs, where 155 | data is solarized if [solarize] is True 156 | 157 | """ 158 | if not (data is None): 159 | if solarize and data.shape[1]==3: 160 | seq = nn.Sequential(kornia.augmentation.RandomSolarize((0, 1))) 161 | data = seq(data.cpu()/255.).cuda()*255. 162 | return data, target, probs 163 | 164 | 165 | 166 | 167 | def mix(mask, data = None, target = None, probs = None): 168 | """ 169 | Applies classMix augmentation: 170 | https://openaccess.thecvf.com/content/WACV2021/papers/Olsson_ClassMix_Segmentation-Based_Data_Augmentation_for_Semi-Supervised_Learning_WACV_2021_paper.pdf 171 | Args: 172 | mask: masks for applying ClassMix. A list of B elements of CxWxH tensors 173 | data: input data to augment BxCxWxH 174 | target: labels to augment BxWxH 175 | probs: probability masks to augment BxCxWxH 176 | 177 | Returns: 178 | data, target and probs augmented with classMix 179 | 180 | """ 181 | if not (data is None): 182 | if mask.shape[0] == data.shape[0]: 183 | data = torch.cat([((1 - mask[(i + 1) % data.shape[0]]) * data[i] + mask[(i + 1) % data.shape[0]] * data[(i + 1) % data.shape[0]]).unsqueeze(0) for i in range(data.shape[0])]) 184 | 185 | if not (target is None): 186 | target = torch.cat([((1 - mask[(i + 1) % data.shape[0]]) * target[i] + mask[(i + 1) % data.shape[0]] * target[(i + 1) % target.shape[0]]).unsqueeze(0) for i in range(target.shape[0])]) 187 | 188 | if not (probs is None): 189 | probs = torch.cat([((1 - mask[(i + 1) % data.shape[0]]) * probs[i] + mask[(i + 1) % data.shape[0]] * probs[(i + 1) % probs.shape[0]]).unsqueeze(0) for i in range(probs.shape[0])]) 190 | 191 | return data, target, probs 192 | 193 | 194 | def random_scale_crop(scale, data = None, target = None, ignore_label=255, probs = None): 195 | """ 196 | 197 | Args: 198 | scale: scale ratio. Float 199 | data: input data to augment BxCxWxH 200 | target: labels to augment BxWxH 201 | probs: probability masks to augment BxCxWxH 202 | ignore_label: integeer value that defines the ignore class in the datasets for the labels 203 | 204 | Returns: 205 | data, target and prob, after applied a scaling operation. output resolution is preserve as the same as the input resolution WxH 206 | """ 207 | if scale != 1: 208 | init_size_w = data.shape[2] 209 | init_size_h = data.shape[3] 210 | 211 | # scale data, labels and probs 212 | data = nn.functional.interpolate(data, scale_factor=scale, mode='bilinear', align_corners=True, recompute_scale_factor=True) 213 | if target is not None: 214 | target = nn.functional.interpolate(target.unsqueeze(1).float(), scale_factor=scale, mode='nearest', recompute_scale_factor=True).long().squeeze(1) 215 | if probs is not None: 216 | probs = nn.functional.interpolate(probs.unsqueeze(1), scale_factor=scale, mode='bilinear', align_corners=True, recompute_scale_factor=True).squeeze(1) 217 | 218 | final_size_w = data.shape[2] 219 | final_size_h = data.shape[3] 220 | diff_h = init_size_h - final_size_h 221 | diff_w = init_size_w - final_size_w 222 | if scale < 1: # add padding if needed 223 | if diff_h % 2 == 1: 224 | pad = nn.ConstantPad2d((diff_w//2, diff_w//2+1, diff_h//2+1, diff_h//2), 0) 225 | else: 226 | pad = nn.ConstantPad2d((diff_w//2, diff_w//2, diff_h//2, diff_h//2), 0) 227 | 228 | data = pad(data) 229 | if probs is not None: 230 | probs = pad(probs) 231 | 232 | # padding with ignore label to add to labels 233 | if diff_h % 2 == 1: 234 | pad = nn.ConstantPad2d((diff_w//2, diff_w//2+1, diff_h//2+1, diff_h//2), ignore_label) 235 | else: 236 | pad = nn.ConstantPad2d((diff_w//2, diff_w//2, diff_h//2, diff_h//2), ignore_label) 237 | 238 | if target is not None: 239 | target = pad(target) 240 | 241 | else: # crop if needed 242 | w = random.randint(0, data.shape[2] - init_size_w) 243 | h = random.randint(0, data.shape[3] - init_size_h) 244 | data = data [:,:,h:h+init_size_h,w:w + init_size_w] 245 | if probs is not None: 246 | probs = probs [:,h:h+init_size_h,w:w + init_size_w] 247 | if target is not None: 248 | target = target [:,h:h+init_size_h,w:w + init_size_w] 249 | 250 | return data, target, probs 251 | 252 | 253 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /model/deeplabv3.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the implementation of DeepLabv3+ without multi-scale inputs. This implementation uses ResNet-101 as backbone. 3 | 4 | This deeplab is used with imagenet pretraining to match the current pytorch implementation that provides these weights. 5 | This implementation follows the new implementation of Resnet bottleneck module where the stride is performed in the 3x3 conv. 6 | 7 | Code taken from https://github.com/WilhelmT/ClassMix 8 | Slightly modified 9 | """ 10 | import torch.nn as nn 11 | import math 12 | import torch.utils.model_zoo as model_zoo 13 | import torch 14 | import numpy as np 15 | affine_par = True 16 | import torch.nn.functional as F 17 | BatchNorm = nn.BatchNorm2d 18 | 19 | 20 | class Decoder(nn.Module): 21 | def __init__(self, num_classes): 22 | super(Decoder, self).__init__() 23 | low_level_inplanes = 256 24 | 25 | self.conv1 = nn.Conv2d(low_level_inplanes, 48, 1, bias=False) 26 | self.bn1 = BatchNorm(48) 27 | self.relu = nn.ReLU() 28 | self.pre_last_conv = nn.Sequential(nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False), 29 | BatchNorm(256), 30 | nn.ReLU(), 31 | nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False), 32 | BatchNorm(256), 33 | nn.ReLU()) 34 | 35 | self.last_dropout = nn.Dropout(0.1) 36 | self.last_conv = nn.Conv2d(256, num_classes, kernel_size=1, stride=1) 37 | 38 | self._init_weight() 39 | 40 | 41 | def forward(self, x, low_level_feat, return_features=False): 42 | low_level_feat = self.conv1(low_level_feat) 43 | low_level_feat = self.bn1(low_level_feat) 44 | low_level_feat = self.relu(low_level_feat) 45 | 46 | x = F.interpolate(x, size=low_level_feat.size()[2:], mode='bilinear', align_corners=True) 47 | x = torch.cat((x, low_level_feat), dim=1) 48 | x_f = self.pre_last_conv(x) 49 | x = self.last_dropout(x_f) 50 | x = self.last_conv(x) 51 | if return_features: 52 | return x, x_f 53 | return x 54 | 55 | def _init_weight(self): 56 | for m in self.modules(): 57 | if isinstance(m, nn.Conv2d): 58 | torch.nn.init.kaiming_normal_(m.weight) 59 | elif isinstance(m, nn.BatchNorm2d): 60 | m.weight.data.fill_(1) 61 | m.bias.data.zero_() 62 | 63 | class _ASPPModule(nn.Module): 64 | def __init__(self, inplanes, planes, kernel_size, padding, dilation): 65 | super(_ASPPModule, self).__init__() 66 | self.atrous_conv = nn.Conv2d(inplanes, planes, kernel_size=kernel_size, 67 | stride=1, padding=padding, dilation=dilation, bias=False) 68 | self.bn = BatchNorm(planes) 69 | self.relu = nn.ReLU() 70 | 71 | self._init_weight() 72 | 73 | def forward(self, x): 74 | x = self.atrous_conv(x) 75 | x = self.bn(x) 76 | 77 | return self.relu(x) 78 | 79 | def _init_weight(self): 80 | for m in self.modules(): 81 | if isinstance(m, nn.Conv2d): 82 | torch.nn.init.kaiming_normal_(m.weight) 83 | elif isinstance(m, nn.BatchNorm2d): 84 | m.weight.data.fill_(1) 85 | m.bias.data.zero_() 86 | 87 | class ASPP(nn.Module): 88 | def __init__(self): 89 | super(ASPP, self).__init__() 90 | inplanes = 2048 91 | dilations = [1, 6, 12, 18] 92 | 93 | 94 | self.aspp1 = _ASPPModule(inplanes, 256, 1, padding=0, dilation=dilations[0]) 95 | self.aspp2 = _ASPPModule(inplanes, 256, 3, padding=dilations[1], dilation=dilations[1]) 96 | self.aspp3 = _ASPPModule(inplanes, 256, 3, padding=dilations[2], dilation=dilations[2]) 97 | self.aspp4 = _ASPPModule(inplanes, 256, 3, padding=dilations[3], dilation=dilations[3]) 98 | 99 | self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), 100 | nn.Conv2d(inplanes, 256, 1, stride=1, bias=False), 101 | BatchNorm(256), 102 | nn.ReLU()) 103 | self.conv1 = nn.Conv2d(1280, 256, 1, bias=False) 104 | self.bn1 = BatchNorm(256) 105 | self.relu = nn.ReLU() 106 | self.dropout = nn.Dropout(0.5) 107 | self._init_weight() 108 | 109 | def forward(self, x): 110 | x1 = self.aspp1(x) 111 | x2 = self.aspp2(x) 112 | x3 = self.aspp3(x) 113 | x4 = self.aspp4(x) 114 | x5 = self.global_avg_pool(x) 115 | x5 = F.interpolate(x5, size=x4.size()[2:], mode='bilinear', align_corners=True) 116 | x = torch.cat((x1, x2, x3, x4, x5), dim=1) 117 | 118 | x = self.conv1(x) 119 | x = self.bn1(x) 120 | x = self.relu(x) 121 | 122 | return self.dropout(x) 123 | 124 | def _init_weight(self): 125 | for m in self.modules(): 126 | if isinstance(m, nn.Conv2d): 127 | torch.nn.init.kaiming_normal_(m.weight) 128 | elif isinstance(m, nn.BatchNorm2d): 129 | m.weight.data.fill_(1) 130 | m.bias.data.zero_() 131 | 132 | 133 | def outS(i): 134 | i = int(i) 135 | i = (i+1)/2 136 | i = int(np.ceil((i+1)/2.0)) 137 | i = (i+1)/2 138 | return i 139 | 140 | def conv3x3(in_planes, out_planes, stride=1): 141 | "3x3 convolution with padding" 142 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 143 | padding=1, bias=False) 144 | 145 | 146 | 147 | 148 | 149 | class Bottleneck(nn.Module): 150 | expansion = 4 151 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None): 152 | super(Bottleneck, self).__init__() 153 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) # change 154 | self.bn1 = nn.BatchNorm2d(planes,affine = affine_par) 155 | for i in self.bn1.parameters(): 156 | i.requires_grad = False 157 | 158 | padding = dilation 159 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, # change 160 | padding=padding, bias=False, dilation = dilation) 161 | self.bn2 = nn.BatchNorm2d(planes,affine = affine_par) 162 | for i in self.bn2.parameters(): 163 | i.requires_grad = False 164 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 165 | self.bn3 = nn.BatchNorm2d(planes * 4, affine = affine_par) 166 | for i in self.bn3.parameters(): 167 | i.requires_grad = False 168 | self.relu = nn.ReLU(inplace=True) 169 | self.downsample = downsample 170 | self.stride = stride 171 | 172 | def forward(self, x): 173 | residual = x 174 | 175 | out = self.conv1(x) 176 | out = self.bn1(out) 177 | out = self.relu(out) 178 | 179 | out = self.conv2(out) 180 | out = self.bn2(out) 181 | out = self.relu(out) 182 | 183 | out = self.conv3(out) 184 | out = self.bn3(out) 185 | 186 | if self.downsample is not None: 187 | residual = self.downsample(x) 188 | 189 | out += residual 190 | out = self.relu(out) 191 | 192 | return out 193 | 194 | 195 | 196 | 197 | 198 | class ResNet(nn.Module): 199 | def __init__(self, block, layers, num_classes, output_stride=16): 200 | self.inplanes = 64 201 | super(ResNet, self).__init__() 202 | self.num_classes= num_classes 203 | if output_stride == 16: 204 | strides = [2, 2, 1] 205 | dilations = [1, 1, 2] 206 | elif output_stride == 8: 207 | strides = [2, 1, 1] 208 | dilations = [ 1, 2, 4] 209 | 210 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 211 | bias=False) 212 | self.bn1 = nn.BatchNorm2d(64, affine = affine_par) 213 | for i in self.bn1.parameters(): 214 | i.requires_grad = False 215 | self.relu = nn.ReLU(inplace=True) 216 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # change 217 | self.layer1 = self._make_layer(block, 64, layers[0]) 218 | self.layer2 = self._make_layer(block, 128, layers[1], stride=strides[0], dilation=dilations[0]) 219 | self.layer3 = self._make_layer(block, 256, layers[2], stride=strides[1], dilation=dilations[1]) 220 | self.layer4 = self._make_layer(block, 512, layers[3], stride=strides[2], dilation=dilations[2]) 221 | 222 | self.aspp = ASPP() 223 | self.decoder = Decoder(num_classes) 224 | 225 | 226 | dim_in = 256 227 | feat_dim = 256 228 | self.projection_head = nn.Sequential( 229 | nn.Linear(dim_in, feat_dim), 230 | nn.BatchNorm1d(feat_dim), 231 | nn.ReLU(inplace=True), 232 | nn.Linear(feat_dim, feat_dim) 233 | ) 234 | self.prediction_head = nn.Sequential( 235 | nn.Linear(feat_dim, feat_dim), 236 | nn.BatchNorm1d(feat_dim), 237 | nn.ReLU(inplace=True), 238 | nn.Linear(feat_dim, feat_dim) 239 | ) 240 | 241 | for class_c in range(num_classes): 242 | selector = nn.Sequential( 243 | nn.Linear(feat_dim, feat_dim), 244 | nn.BatchNorm1d(feat_dim), 245 | nn.LeakyReLU(negative_slope=0.2, inplace=True), 246 | nn.Linear(feat_dim, 1) 247 | ) 248 | self.__setattr__('contrastive_class_selector_' + str(class_c), selector) 249 | 250 | for class_c in range(num_classes): 251 | selector = nn.Sequential( 252 | nn.Linear(feat_dim, feat_dim), 253 | nn.BatchNorm1d(feat_dim), 254 | nn.LeakyReLU(negative_slope=0.2, inplace=True), 255 | nn.Linear(feat_dim, 1) 256 | ) 257 | self.__setattr__('contrastive_class_selector_memory' + str(class_c), selector) 258 | 259 | for m in self.modules(): 260 | if isinstance(m, nn.Conv2d): 261 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 262 | m.weight.data.normal_(0, 0.01) 263 | elif isinstance(m, nn.BatchNorm2d): 264 | m.weight.data.fill_(1) 265 | m.bias.data.zero_() 266 | 267 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1): 268 | downsample = None 269 | if stride != 1 or self.inplanes != planes * block.expansion or dilation == 2 or dilation == 4: 270 | downsample = nn.Sequential( 271 | nn.Conv2d(self.inplanes, planes * block.expansion, 272 | kernel_size=1, stride=stride, bias=False), 273 | nn.BatchNorm2d(planes * block.expansion,affine = affine_par)) 274 | for i in downsample._modules['1'].parameters(): 275 | i.requires_grad = False 276 | layers = [] 277 | layers.append(block(self.inplanes, planes, stride,dilation=dilation, downsample=downsample)) 278 | self.inplanes = planes * block.expansion 279 | for i in range(1, blocks): 280 | layers.append(block(self.inplanes, planes, dilation=dilation)) 281 | 282 | return nn.Sequential(*layers) 283 | 284 | def _make_pred_layer(self,block, dilation_series, padding_series,num_classes): 285 | return block(dilation_series,padding_series,num_classes) 286 | 287 | 288 | def forward_projection_head(self, features): 289 | return self.projection_head(features) 290 | 291 | def forward_prediction_head(self, features): 292 | return self.prediction_head(features) 293 | 294 | def forward(self, x, return_features=False): 295 | x = self.conv1(x) 296 | x = self.bn1(x) 297 | x = self.relu(x) 298 | x = self.maxpool(x) 299 | low_level_feat = self.layer1(x) 300 | x = self.layer2(low_level_feat) 301 | x = self.layer3(x) 302 | x = self.layer4(x) 303 | x = self.aspp(x) 304 | if return_features: 305 | x, features = self.decoder(x, low_level_feat, True) 306 | return x, features 307 | else: 308 | x = self.decoder(x, low_level_feat, False) 309 | return x 310 | 311 | 312 | def get_1x_lr_params(self): 313 | """ 314 | This generator returns all the parameters of the net except for 315 | the last classification layer. Note that for each batchnorm layer, 316 | requires_grad is set to False in deeplab_resnet.py, therefore this function does not return 317 | any batchnorm parameter 318 | """ 319 | b = [] 320 | 321 | b.append(self.conv1) 322 | b.append(self.bn1) 323 | b.append(self.layer1) 324 | b.append(self.layer2) 325 | b.append(self.layer3) 326 | b.append(self.layer4) 327 | b.append(self.aspp) 328 | b.append(self.decoder) 329 | b.append(self.projection_head) 330 | b.append(self.prediction_head) 331 | 332 | for class_c in range(self.num_classes): 333 | b.append(self.__getattr__('contrastive_class_selector_' + str(class_c))) 334 | b.append(self.__getattr__('contrastive_class_selector_memory' + str(class_c))) 335 | 336 | for i in range(len(b)): 337 | for k in b[i].parameters(): 338 | if k.requires_grad: 339 | yield k 340 | 341 | def optim_parameters(self, args): 342 | return [{'params': self.get_1x_lr_params(), 'lr': args.learning_rate}] 343 | 344 | def Res_Deeplab(num_classes): 345 | model = ResNet(Bottleneck,[3, 4, 23, 3], num_classes) 346 | return model 347 | 348 | def Res_Deeplab50(num_classes, os=16): 349 | model = ResNet(Bottleneck,[3, 4, 6, 3], num_classes, output_stride=os) 350 | return model 351 | 352 | 353 | -------------------------------------------------------------------------------- /utils/sync_batchnorm/batchnorm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : batchnorm.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import collections 12 | import contextlib 13 | 14 | import torch 15 | import torch.nn.functional as F 16 | 17 | from torch.nn.modules.batchnorm import _BatchNorm 18 | 19 | try: 20 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast 21 | except ImportError: 22 | ReduceAddCoalesced = Broadcast = None 23 | 24 | try: 25 | from jactorch.parallel.comm import SyncMaster 26 | from jactorch.parallel.data_parallel import JacDataParallel as DataParallelWithCallback 27 | except ImportError: 28 | from .comm import SyncMaster 29 | from .replicate import DataParallelWithCallback 30 | 31 | __all__ = [ 32 | 'SynchronizedBatchNorm1d', 'SynchronizedBatchNorm2d', 'SynchronizedBatchNorm3d', 33 | 'patch_sync_batchnorm', 'convert_model' 34 | ] 35 | 36 | 37 | def _sum_ft(tensor): 38 | """sum over the first and last dimention""" 39 | return tensor.sum(dim=0).sum(dim=-1) 40 | 41 | 42 | def _unsqueeze_ft(tensor): 43 | """add new dimensions at the front and the tail""" 44 | return tensor.unsqueeze(0).unsqueeze(-1) 45 | 46 | 47 | _ChildMessage = collections.namedtuple('_ChildMessage', ['sum', 'ssum', 'sum_size']) 48 | _MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std']) 49 | 50 | 51 | class _SynchronizedBatchNorm(_BatchNorm): 52 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True): 53 | assert ReduceAddCoalesced is not None, 'Can not use Synchronized Batch Normalization without CUDA support.' 54 | 55 | super(_SynchronizedBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine) 56 | 57 | self._sync_master = SyncMaster(self._data_parallel_master) 58 | 59 | self._is_parallel = False 60 | self._parallel_id = None 61 | self._slave_pipe = None 62 | 63 | def forward(self, input): 64 | # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation. 65 | if not (self._is_parallel and self.training): 66 | return F.batch_norm( 67 | input, self.running_mean, self.running_var, self.weight, self.bias, 68 | self.training, self.momentum, self.eps) 69 | 70 | # Resize the input to (B, C, -1). 71 | input_shape = input.size() 72 | input = input.view(input.size(0), self.num_features, -1) 73 | 74 | # Compute the sum and square-sum. 75 | sum_size = input.size(0) * input.size(2) 76 | input_sum = _sum_ft(input) 77 | input_ssum = _sum_ft(input ** 2) 78 | 79 | # Reduce-and-broadcast the statistics. 80 | if self._parallel_id == 0: 81 | mean, inv_std = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size)) 82 | else: 83 | mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size)) 84 | 85 | # Compute the output. 86 | if self.affine: 87 | # MJY:: Fuse the multiplication for speed. 88 | output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std * self.weight) + _unsqueeze_ft(self.bias) 89 | else: 90 | output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std) 91 | 92 | # Reshape it. 93 | return output.view(input_shape) 94 | 95 | def __data_parallel_replicate__(self, ctx, copy_id): 96 | self._is_parallel = True 97 | self._parallel_id = copy_id 98 | 99 | # parallel_id == 0 means master device. 100 | if self._parallel_id == 0: 101 | ctx.sync_master = self._sync_master 102 | else: 103 | self._slave_pipe = ctx.sync_master.register_slave(copy_id) 104 | 105 | def _data_parallel_master(self, intermediates): 106 | """Reduce the sum and square-sum, compute the statistics, and broadcast it.""" 107 | 108 | # Always using same "device order" makes the ReduceAdd operation faster. 109 | # Thanks to:: Tete Xiao (http://tetexiao.com/) 110 | intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device()) 111 | 112 | to_reduce = [i[1][:2] for i in intermediates] 113 | to_reduce = [j for i in to_reduce for j in i] # flatten 114 | target_gpus = [i[1].sum.get_device() for i in intermediates] 115 | 116 | sum_size = sum([i[1].sum_size for i in intermediates]) 117 | sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce) 118 | mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size) 119 | 120 | broadcasted = Broadcast.apply(target_gpus, mean, inv_std) 121 | 122 | outputs = [] 123 | for i, rec in enumerate(intermediates): 124 | outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2]))) 125 | 126 | return outputs 127 | 128 | def _compute_mean_std(self, sum_, ssum, size): 129 | """Compute the mean and standard-deviation with sum and square-sum. This method 130 | also maintains the moving average on the master device.""" 131 | assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.' 132 | mean = sum_ / size 133 | sumvar = ssum - sum_ * mean 134 | unbias_var = sumvar / (size - 1) 135 | bias_var = sumvar / size 136 | 137 | if hasattr(torch, 'no_grad'): 138 | with torch.no_grad(): 139 | self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data 140 | self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data 141 | else: 142 | self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data 143 | self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data 144 | 145 | return mean, bias_var.clamp(self.eps) ** -0.5 146 | 147 | 148 | class SynchronizedBatchNorm1d(_SynchronizedBatchNorm): 149 | r"""Applies Synchronized Batch Normalization over a 2d or 3d input that is seen as a 150 | mini-batch. 151 | 152 | .. math:: 153 | 154 | y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta 155 | 156 | This module differs from the built-in PyTorch BatchNorm1d as the mean and 157 | standard-deviation are reduced across all devices during training. 158 | 159 | For example, when one uses `nn.DataParallel` to wrap the network during 160 | training, PyTorch's implementation normalize the tensor on each device using 161 | the statistics only on that device, which accelerated the computation and 162 | is also easy to implement, but the statistics might be inaccurate. 163 | Instead, in this synchronized version, the statistics will be computed 164 | over all training samples distributed on multiple devices. 165 | 166 | Note that, for one-GPU or CPU-only case, this module behaves exactly same 167 | as the built-in PyTorch implementation. 168 | 169 | The mean and standard-deviation are calculated per-dimension over 170 | the mini-batches and gamma and beta are learnable parameter vectors 171 | of size C (where C is the input size). 172 | 173 | During training, this layer keeps a running estimate of its computed mean 174 | and variance. The running sum is kept with a default momentum of 0.1. 175 | 176 | During evaluation, this running mean/variance is used for normalization. 177 | 178 | Because the BatchNorm is done over the `C` dimension, computing statistics 179 | on `(N, L)` slices, it's common terminology to call this Temporal BatchNorm 180 | 181 | Args: 182 | num_features: num_features from an expected input of size 183 | `batch_size x num_features [x width]` 184 | eps: a value added to the denominator for numerical stability. 185 | Default: 1e-5 186 | momentum: the value used for the running_mean and running_var 187 | computation. Default: 0.1 188 | affine: a boolean value that when set to ``True``, gives the layer learnable 189 | affine parameters. Default: ``True`` 190 | 191 | Shape:: 192 | - Input: :math:`(N, C)` or :math:`(N, C, L)` 193 | - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input) 194 | 195 | Examples: 196 | >>> # With Learnable Parameters 197 | >>> m = SynchronizedBatchNorm1d(100) 198 | >>> # Without Learnable Parameters 199 | >>> m = SynchronizedBatchNorm1d(100, affine=False) 200 | >>> input = torch.autograd.Variable(torch.randn(20, 100)) 201 | >>> output = m(input) 202 | """ 203 | 204 | def _check_input_dim(self, input): 205 | if input.dim() != 2 and input.dim() != 3: 206 | raise ValueError('expected 2D or 3D input (got {}D input)' 207 | .format(input.dim())) 208 | super(SynchronizedBatchNorm1d, self)._check_input_dim(input) 209 | 210 | 211 | class SynchronizedBatchNorm2d(_SynchronizedBatchNorm): 212 | r"""Applies Batch Normalization over a 4d input that is seen as a mini-batch 213 | of 3d inputs 214 | 215 | .. math:: 216 | 217 | y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta 218 | 219 | This module differs from the built-in PyTorch BatchNorm2d as the mean and 220 | standard-deviation are reduced across all devices during training. 221 | 222 | For example, when one uses `nn.DataParallel` to wrap the network during 223 | training, PyTorch's implementation normalize the tensor on each device using 224 | the statistics only on that device, which accelerated the computation and 225 | is also easy to implement, but the statistics might be inaccurate. 226 | Instead, in this synchronized version, the statistics will be computed 227 | over all training samples distributed on multiple devices. 228 | 229 | Note that, for one-GPU or CPU-only case, this module behaves exactly same 230 | as the built-in PyTorch implementation. 231 | 232 | The mean and standard-deviation are calculated per-dimension over 233 | the mini-batches and gamma and beta are learnable parameter vectors 234 | of size C (where C is the input size). 235 | 236 | During training, this layer keeps a running estimate of its computed mean 237 | and variance. The running sum is kept with a default momentum of 0.1. 238 | 239 | During evaluation, this running mean/variance is used for normalization. 240 | 241 | Because the BatchNorm is done over the `C` dimension, computing statistics 242 | on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm 243 | 244 | Args: 245 | num_features: num_features from an expected input of 246 | size batch_size x num_features x height x width 247 | eps: a value added to the denominator for numerical stability. 248 | Default: 1e-5 249 | momentum: the value used for the running_mean and running_var 250 | computation. Default: 0.1 251 | affine: a boolean value that when set to ``True``, gives the layer learnable 252 | affine parameters. Default: ``True`` 253 | 254 | Shape:: 255 | - Input: :math:`(N, C, H, W)` 256 | - Output: :math:`(N, C, H, W)` (same shape as input) 257 | 258 | Examples: 259 | >>> # With Learnable Parameters 260 | >>> m = SynchronizedBatchNorm2d(100) 261 | >>> # Without Learnable Parameters 262 | >>> m = SynchronizedBatchNorm2d(100, affine=False) 263 | >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45)) 264 | >>> output = m(input) 265 | """ 266 | 267 | def _check_input_dim(self, input): 268 | if input.dim() != 4: 269 | raise ValueError('expected 4D input (got {}D input)' 270 | .format(input.dim())) 271 | super(SynchronizedBatchNorm2d, self)._check_input_dim(input) 272 | 273 | 274 | class SynchronizedBatchNorm3d(_SynchronizedBatchNorm): 275 | r"""Applies Batch Normalization over a 5d input that is seen as a mini-batch 276 | of 4d inputs 277 | 278 | .. math:: 279 | 280 | y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta 281 | 282 | This module differs from the built-in PyTorch BatchNorm3d as the mean and 283 | standard-deviation are reduced across all devices during training. 284 | 285 | For example, when one uses `nn.DataParallel` to wrap the network during 286 | training, PyTorch's implementation normalize the tensor on each device using 287 | the statistics only on that device, which accelerated the computation and 288 | is also easy to implement, but the statistics might be inaccurate. 289 | Instead, in this synchronized version, the statistics will be computed 290 | over all training samples distributed on multiple devices. 291 | 292 | Note that, for one-GPU or CPU-only case, this module behaves exactly same 293 | as the built-in PyTorch implementation. 294 | 295 | The mean and standard-deviation are calculated per-dimension over 296 | the mini-batches and gamma and beta are learnable parameter vectors 297 | of size C (where C is the input size). 298 | 299 | During training, this layer keeps a running estimate of its computed mean 300 | and variance. The running sum is kept with a default momentum of 0.1. 301 | 302 | During evaluation, this running mean/variance is used for normalization. 303 | 304 | Because the BatchNorm is done over the `C` dimension, computing statistics 305 | on `(N, D, H, W)` slices, it's common terminology to call this Volumetric BatchNorm 306 | or Spatio-temporal BatchNorm 307 | 308 | Args: 309 | num_features: num_features from an expected input of 310 | size batch_size x num_features x depth x height x width 311 | eps: a value added to the denominator for numerical stability. 312 | Default: 1e-5 313 | momentum: the value used for the running_mean and running_var 314 | computation. Default: 0.1 315 | affine: a boolean value that when set to ``True``, gives the layer learnable 316 | affine parameters. Default: ``True`` 317 | 318 | Shape:: 319 | - Input: :math:`(N, C, D, H, W)` 320 | - Output: :math:`(N, C, D, H, W)` (same shape as input) 321 | 322 | Examples: 323 | >>> # With Learnable Parameters 324 | >>> m = SynchronizedBatchNorm3d(100) 325 | >>> # Without Learnable Parameters 326 | >>> m = SynchronizedBatchNorm3d(100, affine=False) 327 | >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45, 10)) 328 | >>> output = m(input) 329 | """ 330 | 331 | def _check_input_dim(self, input): 332 | if input.dim() != 5: 333 | raise ValueError('expected 5D input (got {}D input)' 334 | .format(input.dim())) 335 | super(SynchronizedBatchNorm3d, self)._check_input_dim(input) 336 | 337 | 338 | @contextlib.contextmanager 339 | def patch_sync_batchnorm(): 340 | import torch.nn as nn 341 | 342 | backup = nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d 343 | 344 | nn.BatchNorm1d = SynchronizedBatchNorm1d 345 | nn.BatchNorm2d = SynchronizedBatchNorm2d 346 | nn.BatchNorm3d = SynchronizedBatchNorm3d 347 | 348 | yield 349 | 350 | nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d = backup 351 | 352 | 353 | def convert_model(module): 354 | """Traverse the input module and its child recursively 355 | and replace all instance of torch.nn.modules.batchnorm.BatchNorm*N*d 356 | to SynchronizedBatchNorm*N*d 357 | 358 | Args: 359 | module: the input module needs to be convert to SyncBN model 360 | 361 | Examples: 362 | >>> import torch.nn as nn 363 | >>> import torchvision 364 | >>> # m is a standard pytorch model 365 | >>> m = torchvision.models.resnet18(True) 366 | >>> m = nn.DataParallel(m) 367 | >>> # after convert, m is using SyncBN 368 | >>> m = convert_model(m) 369 | """ 370 | if isinstance(module, torch.nn.DataParallel): 371 | mod = module.module 372 | mod = convert_model(mod) 373 | mod = DataParallelWithCallback(mod) 374 | return mod 375 | 376 | mod = module 377 | for pth_module, sync_module in zip([torch.nn.modules.batchnorm.BatchNorm1d, 378 | torch.nn.modules.batchnorm.BatchNorm2d, 379 | torch.nn.modules.batchnorm.BatchNorm3d], 380 | [SynchronizedBatchNorm1d, 381 | SynchronizedBatchNorm2d, 382 | SynchronizedBatchNorm3d]): 383 | if isinstance(module, pth_module): 384 | mod = sync_module(module.num_features, module.eps, module.momentum, module.affine) 385 | mod.running_mean = module.running_mean 386 | mod.running_var = module.running_var 387 | if module.affine: 388 | mod.weight.data = module.weight.data.clone().detach() 389 | mod.bias.data = module.bias.data.clone().detach() 390 | for i in mod.parameters(): 391 | i.requires_grad = False 392 | 393 | 394 | for name, child in module.named_children(): 395 | mod.add_module(name, convert_model(child)) 396 | 397 | return mod 398 | --------------------------------------------------------------------------------