├── deeplab-pytorch ├── libs │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── caffe_pb2.cpython-36.pyc │ ├── models │ │ ├── __pycache__ │ │ │ ├── msc.cpython-36.pyc │ │ │ ├── resnet.cpython-36.pyc │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── deeplabv1.cpython-36.pyc │ │ │ ├── deeplabv2.cpython-36.pyc │ │ │ ├── deeplabv3.cpython-36.pyc │ │ │ └── deeplabv3plus.cpython-36.pyc │ │ ├── msc.py │ │ ├── deeplabv1.py │ │ ├── deeplabv2.py │ │ ├── __init__.py │ │ ├── deeplabv3plus.py │ │ ├── deeplabv3.py │ │ └── resnet.py │ ├── utils │ │ ├── __pycache__ │ │ │ ├── crf.cpython-36.pyc │ │ │ ├── metric.cpython-36.pyc │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── lr_scheduler.cpython-36.pyc │ │ ├── __init__.py │ │ ├── lr_scheduler.py │ │ ├── crf.py │ │ └── metric.py │ └── datasets │ │ ├── __pycache__ │ │ ├── base.cpython-36.pyc │ │ ├── voc.cpython-36.pyc │ │ ├── __init__.cpython-36.pyc │ │ └── cocostuff.cpython-36.pyc │ │ ├── __init__.py │ │ ├── base.py │ │ ├── voc.py │ │ └── cocostuff.py ├── data │ ├── models │ │ ├── coco │ │ │ └── deeplabv1_resnet101 │ │ │ │ └── caffemodel │ │ │ │ └── .gitkeep │ │ └── voc12 │ │ │ └── deeplabv2_resnet101_msc │ │ │ └── caffemodel │ │ │ └── .gitkeep │ └── datasets │ │ ├── voc12 │ │ ├── labels.txt │ │ └── README.md │ │ ├── coco │ │ └── labels.txt │ │ └── cocostuff │ │ ├── README.md │ │ ├── labels.txt │ │ └── cocostuff_hierarchy.yaml ├── scripts │ ├── setup_voc12.sh │ ├── setup_cocostuff10k.sh │ ├── setup_caffemodels.sh │ ├── setup_cocostuff164k.sh │ └── train_eval.sh ├── configs │ ├── conda_env.yaml │ ├── coco.yaml │ ├── voc12.yaml │ ├── cocostuff10k.yaml │ └── cocostuff164k.yaml ├── LICENSE ├── hubconf.py ├── demo.py ├── convert.py └── README.md ├── utils ├── __init__.py ├── transforms │ ├── __init__.py │ ├── __init__.pyc │ ├── functional.pyc │ ├── transforms.pyc │ └── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── functional.cpython-36.pyc │ │ └── transforms.cpython-36.pyc ├── avgMeter.py ├── Metrics.py ├── pyutils.py ├── Restore.py ├── torchutils.py ├── imutils.py ├── LoadData.py └── datasets.py ├── scripts ├── __pycache__ │ └── my_optim.cpython-36.pyc ├── test_iam.py ├── test.py ├── my_optim.py ├── train_iam.py └── train.py ├── test.sh ├── test_iam.sh ├── train.sh ├── train_iam.sh ├── train+.sh ├── res.py ├── runs └── exp1 │ └── res.py ├── models ├── vgg1.py └── vgg.py ├── README.md └── gen_gt.py /deeplab-pytorch/libs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .avgMeter import * 2 | -------------------------------------------------------------------------------- /utils/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .transforms import * -------------------------------------------------------------------------------- /deeplab-pytorch/data/models/coco/deeplabv1_resnet101/caffemodel/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deeplab-pytorch/data/models/voc12/deeplabv2_resnet101_msc/caffemodel/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/transforms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/__init__.pyc -------------------------------------------------------------------------------- /utils/transforms/functional.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/functional.pyc -------------------------------------------------------------------------------- /utils/transforms/transforms.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/transforms.pyc -------------------------------------------------------------------------------- /scripts/__pycache__/my_optim.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/scripts/__pycache__/my_optim.cpython-36.pyc -------------------------------------------------------------------------------- /utils/transforms/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/transforms/__pycache__/functional.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/__pycache__/functional.cpython-36.pyc -------------------------------------------------------------------------------- /utils/transforms/__pycache__/transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/__pycache__/transforms.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/__pycache__/caffe_pb2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/__pycache__/caffe_pb2.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/__pycache__/msc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/msc.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/utils/__pycache__/crf.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/utils/__pycache__/crf.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/datasets/__pycache__/base.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/datasets/__pycache__/base.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/datasets/__pycache__/voc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/datasets/__pycache__/voc.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/__pycache__/resnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/resnet.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/utils/__pycache__/metric.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/utils/__pycache__/metric.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/__pycache__/deeplabv1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/deeplabv1.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/__pycache__/deeplabv2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/deeplabv2.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/__pycache__/deeplabv3.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/deeplabv3.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .crf import DenseCRF 3 | from .lr_scheduler import PolynomialLR 4 | from .metric import scores 5 | -------------------------------------------------------------------------------- /deeplab-pytorch/libs/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/datasets/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/datasets/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/datasets/__pycache__/cocostuff.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/datasets/__pycache__/cocostuff.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/utils/__pycache__/lr_scheduler.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/utils/__pycache__/lr_scheduler.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/__pycache__/deeplabv3plus.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/deeplabv3plus.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab-pytorch/scripts/setup_voc12.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DATASET_DIR=$1 4 | 5 | # Download PASCAL VOC12 (2GB) 6 | wget -nc -P $DATASET_DIR http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 7 | 8 | # Extract images, annotations, etc. 9 | tar -xvf $DATASET_DIR/VOCtrainval_11-May-2012.tar -C $DATASET_DIR -------------------------------------------------------------------------------- /deeplab-pytorch/data/datasets/voc12/labels.txt: -------------------------------------------------------------------------------- 1 | 0 __background__ 2 | 1 aeroplane 3 | 2 bicycle 4 | 3 bird 5 | 4 boat 6 | 5 bottle 7 | 6 bus 8 | 7 car 9 | 8 cat 10 | 9 chair 11 | 10 cow 12 | 11 diningtable 13 | 12 dog 14 | 13 horse 15 | 14 motorbike 16 | 15 person 17 | 16 pottedplant 18 | 17 sheep 19 | 18 sofa 20 | 19 train 21 | 20 tvmonitor -------------------------------------------------------------------------------- /deeplab-pytorch/libs/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .voc import VOC, VOCAug 2 | from .cocostuff import CocoStuff10k, CocoStuff164k 3 | 4 | 5 | def get_dataset(name): 6 | return { 7 | "cocostuff10k": CocoStuff10k, 8 | "cocostuff164k": CocoStuff164k, 9 | "voc": VOC, 10 | "vocaug": VOCAug, 11 | }[name] 12 | -------------------------------------------------------------------------------- /deeplab-pytorch/configs/conda_env.yaml: -------------------------------------------------------------------------------- 1 | name: deeplab-pytorch 2 | dependencies: 3 | - click 4 | - conda-forge::pydensecrf 5 | - cudatoolkit=10.2 6 | - matplotlib 7 | - python=3.6 8 | - pytorch::pytorch>1.2.0 9 | - pytorch::torchvision 10 | - pyyaml 11 | - scipy 12 | - tqdm 13 | - pip: 14 | - addict 15 | - black 16 | - joblib 17 | - omegaconf 18 | - opencv-python 19 | - tensorflow 20 | - torchnet 21 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | EXP=exp1 3 | 4 | CUDA_VISIBLE_DEVICES=0 python3 ./scripts/test.py \ 5 | --img_dir=./data/VOCdevkit/VOC2012/JPEGImages/ \ 6 | --test_list=./data/voc12/train_cls.txt \ 7 | --arch=vgg \ 8 | --batch_size=1 \ 9 | --dataset=pascal_voc \ 10 | --input_size=224 \ 11 | --num_classes=20 \ 12 | --restore_from=./runs/${EXP}/model/pascal_voc_epoch_14.pth \ 13 | --save_dir=./runs/${EXP}/attention/ \ 14 | -------------------------------------------------------------------------------- /test_iam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | EXP=exp2 3 | 4 | CUDA_VISIBLE_DEVICES=0 python3 ./scripts/test_iam.py \ 5 | --img_dir=./data/VOCdevkit/VOC2012/JPEGImages/ \ 6 | --test_list=./data/voc12/train_cls.txt \ 7 | --arch=vgg1 \ 8 | --batch_size=1 \ 9 | --dataset=pascal_voc \ 10 | --input_size=224 \ 11 | --num_classes=20 \ 12 | --restore_from=./runs/${EXP}/model/pascal_voc_epoch_14.pth \ 13 | --save_dir=./runs/${EXP}/attention/ \ 14 | -------------------------------------------------------------------------------- /utils/avgMeter.py: -------------------------------------------------------------------------------- 1 | class AverageMeter(object): 2 | """Computes and stores the average and current value""" 3 | def __init__(self): 4 | self.reset() 5 | 6 | def reset(self): 7 | self.val = 0 8 | self.avg = 0 9 | self.sum = 0 10 | self.count = 0 11 | 12 | def update(self, val, n=1): 13 | self.val = val 14 | self.sum += val * n 15 | self.count += n 16 | self.avg = self.sum / self.count 17 | 18 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | EXP=exp1 3 | 4 | CUDA_VISIBLE_DEVICES=0 python3 ./scripts/train.py \ 5 | --img_dir=./data/VOCdevkit/VOC2012/JPEGImages/ \ 6 | --train_list=./data/voc12/train_cls.txt \ 7 | --test_list=./data/voc12/val_cls.txt \ 8 | --epoch=15 \ 9 | --lr=0.001 \ 10 | --batch_size=1 \ 11 | --iter_size=5 \ 12 | --dataset=pascal_voc \ 13 | --input_size=224 \ 14 | --disp_interval=100 \ 15 | --num_classes=20 \ 16 | --num_workers=8 \ 17 | --snapshot_dir=./runs/${EXP}/model/ \ 18 | --accu_dir=./runs/${EXP}/accu_att/ \ 19 | --decay_points='10' 20 | -------------------------------------------------------------------------------- /train_iam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | EXP=exp2 3 | 4 | CUDA_VISIBLE_DEVICES=0 python3 ./scripts/train_iam.py \ 5 | --img_dir=./data/VOCdevkit/VOC2012/JPEGImages/ \ 6 | --train_list=./data/voc12/train_cls.txt \ 7 | --test_list=./data/voc12/val_cls.txt \ 8 | --epoch=15 \ 9 | --lr=0.001 \ 10 | --batch_size=1 \ 11 | --iter_size=5 \ 12 | --dataset=pascal_voc \ 13 | --input_size=224 \ 14 | --disp_interval=100 \ 15 | --num_classes=20 \ 16 | --num_workers=8 \ 17 | --snapshot_dir=./runs/${EXP}/model/ \ 18 | --att_dir=./runs/exp1/accu_att/ \ 19 | --decay_points='10' 20 | -------------------------------------------------------------------------------- /deeplab-pytorch/scripts/setup_cocostuff10k.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DATASET_DIR=$1 4 | 5 | # Download COCO-Stuff 10k (2GB) 6 | wget -nc -P $DATASET_DIR http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/cocostuff-10k-v1.1.zip 7 | 8 | unzip -n $DATASET_DIR/cocostuff-10k-v1.1.zip -d $DATASET_DIR 9 | 10 | echo =============================================================================================== 11 | echo "Set the path below to \"ROOT:\" in the config/cocostuff10k.yaml:" 12 | echo -e "\033[32m $DATASET_DIR \033[00m" 13 | echo =============================================================================================== -------------------------------------------------------------------------------- /train+.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | EXP=exp3 3 | 4 | CUDA_VISIBLE_DEVICES=0 python3 ./scripts/train.py \ 5 | --img_dir=./data/VOCdevkit/VOC2012/JPEGImages/ \ 6 | --train_list=./data/voc12/train_cls.txt \ 7 | --test_list=./data/voc12/val_cls.txt \ 8 | --epoch=15 \ 9 | --lr=0.001 \ 10 | --batch_size=1 \ 11 | --iter_size=5 \ 12 | --dataset=pascal_voc \ 13 | --input_size=224 \ 14 | --disp_interval=100 \ 15 | --num_classes=20 \ 16 | --num_workers=8 \ 17 | --snapshot_dir=./runs/${EXP}/model/ \ 18 | --att_dir=./runs/${EXP}/att/ \ 19 | --accu_dir=./runs/${EXP}/accu_att/ \ 20 | --decay_points='10' \ 21 | --drop_layer \ 22 | --drop_rate=0.5 \ 23 | --drop_threshold=0.6 \ 24 | -------------------------------------------------------------------------------- /deeplab-pytorch/scripts/setup_caffemodels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Download released caffemodels 4 | wget -nc -P ./data http://liangchiehchen.com/projects/released/deeplab_aspp_resnet101/prototxt_and_model.zip 5 | 6 | unzip -n ./data/prototxt_and_model.zip -d ./data 7 | 8 | # Move caffemodels to data directories 9 | ## MSCOCO 10 | mv ./data/init.caffemodel ./data/models/coco/deeplabv1_resnet101/caffemodel 11 | ## PASCAL VOC 2012 12 | mv ./data/train_iter_20000.caffemodel ./data/models/voc12/deeplabv2_resnet101_msc/caffemodel 13 | mv ./data/train2_iter_20000.caffemodel ./data/models/voc12/deeplabv2_resnet101_msc/caffemodel 14 | 15 | echo =============================================================================================== 16 | echo "Next, try running script below:" 17 | echo -e "\033[32m python convert.py --dataset coco \033[00m" 18 | echo =============================================================================================== 19 | -------------------------------------------------------------------------------- /deeplab-pytorch/scripts/setup_cocostuff164k.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DATASET_DIR=$1 4 | 5 | # Download COCO-Stuff 164k (20GB+) 6 | wget -nc -P $DATASET_DIR http://images.cocodataset.org/zips/train2017.zip 7 | wget -nc -P $DATASET_DIR http://images.cocodataset.org/zips/val2017.zip 8 | wget -nc -P $DATASET_DIR http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip 9 | 10 | mkdir -p $DATASET_DIR/images 11 | mkdir -p $DATASET_DIR/annotations 12 | unzip -n $DATASET_DIR/train2017.zip -d $DATASET_DIR/images/ 13 | unzip -n $DATASET_DIR/val2017.zip -d $DATASET_DIR/images/ 14 | unzip -n $DATASET_DIR/stuffthingmaps_trainval2017.zip -d $DATASET_DIR/annotations/ 15 | 16 | echo =============================================================================================== 17 | echo "Set the path below to \"ROOT:\" in the config/cocostuff164k.yaml:" 18 | echo -e "\033[32m $DATASET_DIR \033[00m" 19 | echo =============================================================================================== -------------------------------------------------------------------------------- /deeplab-pytorch/libs/utils/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: https://kazuto1011.github.io 6 | # Date: 09 January 2019 7 | 8 | 9 | from torch.optim.lr_scheduler import _LRScheduler 10 | 11 | 12 | class PolynomialLR(_LRScheduler): 13 | def __init__(self, optimizer, step_size, iter_max, power, last_epoch=-1): 14 | self.step_size = step_size 15 | self.iter_max = iter_max 16 | self.power = power 17 | super(PolynomialLR, self).__init__(optimizer, last_epoch) 18 | 19 | def polynomial_decay(self, lr): 20 | return lr * (1 - float(self.last_epoch) / self.iter_max) ** self.power 21 | 22 | def get_lr(self): 23 | if ( 24 | (self.last_epoch == 0) 25 | or (self.last_epoch % self.step_size != 0) 26 | or (self.last_epoch > self.iter_max) 27 | ): 28 | return [group["lr"] for group in self.optimizer.param_groups] 29 | return [self.polynomial_decay(lr) for lr in self.base_lrs] 30 | -------------------------------------------------------------------------------- /deeplab-pytorch/configs/coco.yaml: -------------------------------------------------------------------------------- 1 | EXP: 2 | ID: coco 3 | OUTPUT_DIR: data 4 | 5 | DATASET: 6 | NAME: coco 7 | ROOT: 8 | LABELS: ./data/datasets/coco/labels.txt 9 | N_CLASSES: 91 10 | IGNORE_LABEL: 11 | SCALES: 12 | SPLIT: 13 | TRAIN: 14 | VAL: 15 | TEST: 16 | 17 | DATALOADER: 18 | NUM_WORKERS: 0 19 | 20 | IMAGE: 21 | MEAN: 22 | R: 122.675 23 | G: 116.669 24 | B: 104.008 25 | SIZE: 26 | BASE: 27 | TRAIN: 28 | TEST: 513 29 | 30 | MODEL: 31 | NAME: DeepLabV1_ResNet101 32 | N_BLOCKS: [3, 4, 23, 3] 33 | ATROUS_RATES: 34 | INIT_MODEL: 35 | 36 | SOLVER: 37 | BATCH_SIZE: 38 | TRAIN: 5 39 | TEST: 1 40 | ITER_MAX: 100000 41 | ITER_SIZE: 2 42 | ITER_SAVE: 5000 43 | ITER_TB: 100 44 | LR_DECAY: 10 45 | LR: 2.5e-4 46 | MOMENTUM: 0.9 47 | OPTIMIZER: sgd 48 | POLY_POWER: 0.9 49 | WEIGHT_DECAY: 5.0e-4 50 | AVERAGE_LOSS: 20 51 | 52 | CRF: 53 | ITER_MAX: 10 54 | POS_W: 3 55 | POS_XY_STD: 1 56 | BI_W: 4 57 | BI_XY_STD: 67 58 | BI_RGB_STD: 3 59 | -------------------------------------------------------------------------------- /deeplab-pytorch/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Kazuto Nakashima 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /deeplab-pytorch/scripts/train_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | 6 | # 0. Choose from {voc12, cocostuff10k, cocostuff164k} 7 | DATASET=voc12 8 | 9 | 10 | # 1. Train DeepLab v2 on ${DATASET} 11 | python main.py train \ 12 | -c configs/${DATASET}.yaml 13 | 14 | # Trained models are saved into 15 | # data/models/${DATASET}/deeplabv2_resnet101_msc/*/checkpoint_5000.pth 16 | # data/models/${DATASET}/deeplabv2_resnet101_msc/*/checkpoint_10000.pth 17 | # data/models/${DATASET}/deeplabv2_resnet101_msc/*/checkpoint_15000.pth 18 | # ... 19 | 20 | # Tensorboard logs are in data/logs. 21 | 22 | 23 | # 2. Evaluate the model on val set 24 | python main.py test \ 25 | -c configs/${DATASET}.yaml \ 26 | -m data/models/${DATASET}/deeplabv2_resnet101_msc/*/checkpoint_final.pth 27 | 28 | # Validation scores on 4 metrics are saved as 29 | # data/scores/${DATASET}/deeplabv2_resnet101_msc/*/scores.json 30 | 31 | # Logits are saved into 32 | # data/features/${DATASET}/deeplabv2_resnet101_msc/*/logit/... 33 | 34 | 35 | # 3. Re-evaluate the model with CRF post-processing 36 | python main.py crf \ 37 | -c configs/${DATASET}.yaml 38 | 39 | # Scores with CRF on 4 metrics are saved as 40 | # data/scores/${DATASET}/deeplabv2_resnet101_msc/*/scores_crf.json -------------------------------------------------------------------------------- /deeplab-pytorch/libs/utils/crf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: https://kazuto1011.github.io 6 | # Date: 09 January 2019 7 | 8 | 9 | import numpy as np 10 | import pydensecrf.densecrf as dcrf 11 | import pydensecrf.utils as utils 12 | 13 | 14 | class DenseCRF(object): 15 | def __init__(self, iter_max, pos_w, pos_xy_std, bi_w, bi_xy_std, bi_rgb_std): 16 | self.iter_max = iter_max 17 | self.pos_w = pos_w 18 | self.pos_xy_std = pos_xy_std 19 | self.bi_w = bi_w 20 | self.bi_xy_std = bi_xy_std 21 | self.bi_rgb_std = bi_rgb_std 22 | 23 | def __call__(self, image, probmap): 24 | C, H, W = probmap.shape 25 | 26 | U = utils.unary_from_softmax(probmap) 27 | U = np.ascontiguousarray(U) 28 | 29 | image = np.ascontiguousarray(image) 30 | 31 | d = dcrf.DenseCRF2D(W, H, C) 32 | d.setUnaryEnergy(U) 33 | d.addPairwiseGaussian(sxy=self.pos_xy_std, compat=self.pos_w) 34 | d.addPairwiseBilateral( 35 | sxy=self.bi_xy_std, srgb=self.bi_rgb_std, rgbim=image, compat=self.bi_w 36 | ) 37 | 38 | Q = d.inference(self.iter_max) 39 | Q = np.array(Q).reshape((C, H, W)) 40 | 41 | return Q 42 | -------------------------------------------------------------------------------- /deeplab-pytorch/configs/voc12.yaml: -------------------------------------------------------------------------------- 1 | EXP: 2 | ID: voc12 3 | OUTPUT_DIR: data 4 | 5 | DATASET: 6 | NAME: vocaug 7 | ROOT: ./../data/VOCdevkit/VOC2012/ 8 | LABELS: ./data/datasets/voc12/labels.txt 9 | N_CLASSES: 21 10 | IGNORE_LABEL: 255 11 | SCALES: [0.5, 0.75, 1.0, 1.25, 1.5] 12 | SPLIT: 13 | TRAIN: train_aug 14 | VAL: val 15 | TEST: test 16 | 17 | DATALOADER: 18 | NUM_WORKERS: 0 19 | 20 | IMAGE: 21 | MEAN: 22 | R: 122.675 23 | G: 116.669 24 | B: 104.008 25 | SIZE: 26 | BASE: # None 27 | TRAIN: 321 28 | TEST: 513 29 | 30 | MODEL: 31 | NAME: DeepLabV2_ResNet101_MSC 32 | N_BLOCKS: [3, 4, 23, 3] 33 | ATROUS_RATES: [6, 12, 18, 24] 34 | INIT_MODEL: ./data/models/coco/deeplabv1_resnet101/caffemodel/deeplabv1_resnet101-coco.pth 35 | 36 | SOLVER: 37 | BATCH_SIZE: 38 | TRAIN: 5 39 | TEST: 1 40 | ITER_MAX: 20000 41 | ITER_SIZE: 2 42 | ITER_SAVE: 5000 43 | ITER_TB: 100 44 | LR_DECAY: 10 45 | LR: 2.5e-4 46 | MOMENTUM: 0.9 47 | OPTIMIZER: sgd 48 | POLY_POWER: 0.9 49 | WEIGHT_DECAY: 5.0e-4 50 | AVERAGE_LOSS: 20 51 | 52 | CRF: 53 | ITER_MAX: 10 54 | POS_W: 3 55 | POS_XY_STD: 1 56 | BI_W: 4 57 | BI_XY_STD: 67 58 | BI_RGB_STD: 3 59 | -------------------------------------------------------------------------------- /deeplab-pytorch/configs/cocostuff10k.yaml: -------------------------------------------------------------------------------- 1 | EXP: 2 | ID: cocostuff10k 3 | OUTPUT_DIR: data 4 | 5 | DATASET: 6 | NAME: cocostuff10k 7 | ROOT: /media/kazuto1011/Extra/cocostuff/cocostuff-10k-v1.1 8 | LABELS: ./data/datasets/cocostuff/labels.txt 9 | N_CLASSES: 182 10 | IGNORE_LABEL: 255 11 | SCALES: [0.5, 0.75, 1.0, 1.25, 1.5] 12 | SPLIT: 13 | TRAIN: train 14 | VAL: test 15 | TEST: 16 | 17 | DATALOADER: 18 | NUM_WORKERS: 0 19 | 20 | IMAGE: 21 | MEAN: 22 | R: 122.675 23 | G: 116.669 24 | B: 104.008 25 | SIZE: 26 | BASE: 27 | TRAIN: 321 28 | TEST: 513 29 | 30 | MODEL: 31 | NAME: DeepLabV2_ResNet101_MSC 32 | N_BLOCKS: [3, 4, 23, 3] 33 | ATROUS_RATES: [6, 12, 18, 24] 34 | INIT_MODEL: data/models/coco/deeplabv1_resnet101/caffemodel/deeplabv1_resnet101-coco.pth 35 | 36 | SOLVER: 37 | BATCH_SIZE: 38 | TRAIN: 5 39 | TEST: 5 40 | ITER_MAX: 20000 41 | ITER_SIZE: 2 42 | ITER_SAVE: 5000 43 | ITER_TB: 100 44 | LR_DECAY: 10 45 | LR: 2.5e-4 46 | MOMENTUM: 0.9 47 | OPTIMIZER: sgd 48 | POLY_POWER: 0.9 49 | WEIGHT_DECAY: 5.0e-4 50 | AVERAGE_LOSS: 20 51 | 52 | CRF: 53 | ITER_MAX: 10 54 | POS_W: 3 55 | POS_XY_STD: 1 56 | BI_W: 4 57 | BI_XY_STD: 67 58 | BI_RGB_STD: 3 59 | -------------------------------------------------------------------------------- /deeplab-pytorch/configs/cocostuff164k.yaml: -------------------------------------------------------------------------------- 1 | EXP: 2 | ID: cocostuff164k 3 | OUTPUT_DIR: data 4 | 5 | DATASET: 6 | NAME: cocostuff164k 7 | ROOT: /media/kazuto1011/Extra/cocostuff/cocostuff-164k 8 | LABELS: ./data/datasets/cocostuff/labels.txt 9 | N_CLASSES: 182 10 | IGNORE_LABEL: 255 11 | SCALES: [0.5, 0.75, 1.0, 1.25, 1.5] 12 | SPLIT: 13 | TRAIN: train2017 14 | VAL: val2017 15 | TEST: 16 | 17 | DATALOADER: 18 | NUM_WORKERS: 0 19 | 20 | IMAGE: 21 | MEAN: 22 | R: 122.675 23 | G: 116.669 24 | B: 104.008 25 | SIZE: 26 | BASE: # None 27 | TRAIN: 321 28 | TEST: 513 29 | 30 | MODEL: 31 | NAME: DeepLabV2_ResNet101_MSC 32 | N_BLOCKS: [3, 4, 23, 3] 33 | ATROUS_RATES: [6, 12, 18, 24] 34 | INIT_MODEL: data/models/coco/deeplabv1_resnet101/caffemodel/deeplabv1_resnet101-coco.pth 35 | 36 | SOLVER: 37 | BATCH_SIZE: 38 | TRAIN: 5 39 | TEST: 1 40 | ITER_MAX: 100000 41 | ITER_SIZE: 2 42 | ITER_SAVE: 5000 43 | ITER_TB: 100 44 | LR_DECAY: 10 45 | LR: 2.5e-4 46 | MOMENTUM: 0.9 47 | OPTIMIZER: sgd 48 | POLY_POWER: 0.9 49 | WEIGHT_DECAY: 5.0e-4 50 | AVERAGE_LOSS: 20 51 | 52 | CRF: 53 | ITER_MAX: 10 54 | POS_W: 3 55 | POS_XY_STD: 1 56 | BI_W: 4 57 | BI_XY_STD: 67 58 | BI_RGB_STD: 3 59 | -------------------------------------------------------------------------------- /deeplab-pytorch/libs/utils/metric.py: -------------------------------------------------------------------------------- 1 | # Originally written by wkentaro 2 | # https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/utils.py 3 | 4 | import numpy as np 5 | 6 | 7 | def _fast_hist(label_true, label_pred, n_class): 8 | mask = (label_true >= 0) & (label_true < n_class) 9 | hist = np.bincount( 10 | n_class * label_true[mask].astype(int) + label_pred[mask], 11 | minlength=n_class ** 2, 12 | ).reshape(n_class, n_class) 13 | return hist 14 | 15 | 16 | def scores(label_trues, label_preds, n_class): 17 | hist = np.zeros((n_class, n_class)) 18 | for lt, lp in zip(label_trues, label_preds): 19 | hist += _fast_hist(lt.flatten(), lp.flatten(), n_class) 20 | acc = np.diag(hist).sum() / hist.sum() 21 | acc_cls = np.diag(hist) / hist.sum(axis=1) 22 | acc_cls = np.nanmean(acc_cls) 23 | iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)) 24 | valid = hist.sum(axis=1) > 0 # added 25 | mean_iu = np.nanmean(iu[valid]) 26 | freq = hist.sum(axis=1) / hist.sum() 27 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() 28 | cls_iu = dict(zip(range(n_class), iu)) 29 | 30 | return { 31 | "Pixel Accuracy": acc, 32 | "Mean Accuracy": acc_cls, 33 | "Frequency Weighted IoU": fwavacc, 34 | "Mean IoU": mean_iu, 35 | "Class IoU": cls_iu, 36 | } 37 | -------------------------------------------------------------------------------- /deeplab-pytorch/data/datasets/coco/labels.txt: -------------------------------------------------------------------------------- 1 | 0 background 2 | 1 person 3 | 2 bicycle 4 | 3 car 5 | 4 motorcycle 6 | 5 airplane 7 | 6 bus 8 | 7 train 9 | 8 truck 10 | 9 boat 11 | 10 traffic light 12 | 11 fire hydrant 13 | 12 street sign 14 | 13 stop sign 15 | 14 parking meter 16 | 15 bench 17 | 16 bird 18 | 17 cat 19 | 18 dog 20 | 19 horse 21 | 20 sheep 22 | 21 cow 23 | 22 elephant 24 | 23 bear 25 | 24 zebra 26 | 25 giraffe 27 | 26 hat 28 | 27 backpack 29 | 28 umbrella 30 | 29 shoe 31 | 30 eye glasses 32 | 31 handbag 33 | 32 tie 34 | 33 suitcase 35 | 34 frisbee 36 | 35 skis 37 | 36 snowboard 38 | 37 sports ball 39 | 38 kite 40 | 39 baseball bat 41 | 40 baseball glove 42 | 41 skateboard 43 | 42 surfboard 44 | 43 tennis racket 45 | 44 bottle 46 | 45 plate 47 | 46 wine glass 48 | 47 cup 49 | 48 fork 50 | 49 knife 51 | 50 spoon 52 | 51 bowl 53 | 52 banana 54 | 53 apple 55 | 54 sandwich 56 | 55 orange 57 | 56 broccoli 58 | 57 carrot 59 | 58 hot dog 60 | 59 pizza 61 | 60 donut 62 | 61 cake 63 | 62 chair 64 | 63 couch 65 | 64 potted plant 66 | 65 bed 67 | 66 mirror 68 | 67 dining table 69 | 68 window 70 | 69 desk 71 | 70 toilet 72 | 71 door 73 | 72 tv 74 | 73 laptop 75 | 74 mouse 76 | 75 remote 77 | 76 keyboard 78 | 77 cell phone 79 | 78 microwave 80 | 79 oven 81 | 80 toaster 82 | 81 sink 83 | 82 refrigerator 84 | 83 blender 85 | 84 book 86 | 85 clock 87 | 86 vase 88 | 87 scissors 89 | 88 teddy bear 90 | 89 hair drier 91 | 90 toothbrush -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/msc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: http://kazuto1011.github.io 6 | # Created: 2018-03-26 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | 13 | class MSC(nn.Module): 14 | """ 15 | Multi-scale inputs 16 | """ 17 | 18 | def __init__(self, base, scales=None): 19 | super(MSC, self).__init__() 20 | self.base = base 21 | if scales: 22 | self.scales = scales 23 | else: 24 | self.scales = [0.5, 0.75] 25 | 26 | def forward(self, x): 27 | # Original 28 | logits = self.base(x) 29 | _, _, H, W = logits.shape 30 | interp = lambda l: F.interpolate( 31 | l, size=(H, W), mode="bilinear", align_corners=False 32 | ) 33 | 34 | # Scaled 35 | logits_pyramid = [] 36 | for p in self.scales: 37 | h = F.interpolate(x, scale_factor=p, mode="bilinear", align_corners=False) 38 | logits_pyramid.append(self.base(h)) 39 | 40 | # Pixel-wise max 41 | logits_all = [logits] + [interp(l) for l in logits_pyramid] 42 | logits_max = torch.max(torch.stack(logits_all), dim=0)[0] 43 | 44 | if self.training: 45 | return [logits] + logits_pyramid + [logits_max] 46 | else: 47 | return logits_max 48 | -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/deeplabv1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: https://kazuto1011.github.io 6 | # Date: 19 February 2019 7 | 8 | from __future__ import absolute_import, print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | 14 | from .resnet import _ResLayer, _Stem 15 | 16 | 17 | class DeepLabV1(nn.Sequential): 18 | """ 19 | DeepLab v1: Dilated ResNet + 1x1 Conv 20 | Note that this is just a container for loading the pretrained COCO model and not mentioned as "v1" in papers. 21 | """ 22 | 23 | def __init__(self, n_classes, n_blocks): 24 | super(DeepLabV1, self).__init__() 25 | ch = [64 * 2 ** p for p in range(6)] 26 | self.add_module("layer1", _Stem(ch[0])) 27 | self.add_module("layer2", _ResLayer(n_blocks[0], ch[0], ch[2], 1, 1)) 28 | self.add_module("layer3", _ResLayer(n_blocks[1], ch[2], ch[3], 2, 1)) 29 | self.add_module("layer4", _ResLayer(n_blocks[2], ch[3], ch[4], 1, 2)) 30 | self.add_module("layer5", _ResLayer(n_blocks[3], ch[4], ch[5], 1, 4)) 31 | self.add_module("fc", nn.Conv2d(2048, n_classes, 1)) 32 | 33 | 34 | if __name__ == "__main__": 35 | model = DeepLabV1(n_classes=21, n_blocks=[3, 4, 23, 3]) 36 | model.eval() 37 | image = torch.randn(1, 3, 513, 513) 38 | 39 | print(model) 40 | print("input:", image.shape) 41 | print("output:", model(image).shape) 42 | -------------------------------------------------------------------------------- /deeplab-pytorch/data/datasets/cocostuff/README.md: -------------------------------------------------------------------------------- 1 | # COCO-Stuff 2 | 3 | This is an instruction for setting up COCO-Stuff dataset. 4 | COCO-Stuff 164k is the latest version and recommended. 5 | 6 | ![](../../../docs/datasets/cocostuff.png) 7 | 8 | ## COCO-Stuff 164k 9 | 10 | ### Setup 11 | 12 | 1. Run the script below to download the dataset (20GB+). 13 | 14 | ```sh 15 | $ bash ./scripts/setup_cocostuff164k.sh [PATH TO DOWNLOAD] 16 | ``` 17 | 18 | 2. Set the path to the dataset in ```configs/cocostuff164k.yaml```. 19 | 20 | ```yaml 21 | DATASET: cocostuff164k 22 | ROOT: # <- Write here 23 | ... 24 | ``` 25 | 26 | ### Dataset structure 27 | 28 | ``` 29 | ├── images 30 | │ ├── train2017 31 | │ │ ├── 000000000009.jpg 32 | │ │ └── ... 33 | │ └── val2017 34 | │ ├── 000000000139.jpg 35 | │ └── ... 36 | └── annotations 37 | ├── train2017 38 | │ ├── 000000000009.png 39 | │ └── ... 40 | └── val2017 41 | ├── 000000000139.png 42 | └── ... 43 | ``` 44 | 45 | ## COCO-Stuff 10k 46 | 47 | ### Setup 48 | 49 | 1. Run the script below to download the dataset (2GB). 50 | 51 | ```sh 52 | $ bash ./scripts/setup_cocostuff10k.sh [PATH TO DOWNLOAD] 53 | ``` 54 | 55 | 2. Set the path to the dataset in ```configs/cocostuff10k.yaml```. 56 | 57 | ```yaml 58 | DATASET: cocostuff10k 59 | ROOT: # <- Write here 60 | ... 61 | ``` 62 | 63 | ### Dataset structure 64 | 65 | ``` 66 | ├── images 67 | │ ├── COCO_train2014_000000000077.jpg 68 | │ └── ... 69 | ├── annotations 70 | │ ├── COCO_train2014_000000000077.mat 71 | │ └── ... 72 | └── imageLists 73 | ├── all.txt 74 | ├── test.txt 75 | └── train.txt 76 | ``` 77 | -------------------------------------------------------------------------------- /deeplab-pytorch/hubconf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: https://kazuto1011.github.io 6 | # Date: 20 December 2018 7 | 8 | from __future__ import print_function 9 | 10 | from torch.hub import load_state_dict_from_url 11 | 12 | model_url_root = "https://github.com/kazuto1011/deeplab-pytorch/releases/download/v1.0/" 13 | model_dict = { 14 | "cocostuff10k": ("deeplabv2_resnet101_msc-cocostuff10k-20000.pth", 182), 15 | "cocostuff164k": ("deeplabv2_resnet101_msc-cocostuff164k-100000.pth", 182), 16 | "voc12": ("deeplabv2_resnet101_msc-vocaug-20000.pth", 21), 17 | } 18 | 19 | 20 | def deeplabv2_resnet101(pretrained=None, n_classes=182, scales=None): 21 | 22 | from libs.models.deeplabv2 import DeepLabV2 23 | from libs.models.msc import MSC 24 | 25 | # Model parameters 26 | n_blocks = [3, 4, 23, 3] 27 | atrous_rates = [6, 12, 18, 24] 28 | if scales is None: 29 | scales = [0.5, 0.75] 30 | 31 | base = DeepLabV2(n_classes=n_classes, n_blocks=n_blocks, atrous_rates=atrous_rates) 32 | model = MSC(base=base, scales=scales) 33 | 34 | # Load pretrained models 35 | if isinstance(pretrained, str): 36 | 37 | assert pretrained in model_dict, list(model_dict.keys()) 38 | expected = model_dict[pretrained][1] 39 | error_message = "Expected: n_classes={}".format(expected) 40 | assert n_classes == expected, error_message 41 | 42 | model_url = model_url_root + model_dict[pretrained][0] 43 | state_dict = load_state_dict_from_url(model_url) 44 | model.load_state_dict(state_dict) 45 | 46 | return model 47 | 48 | -------------------------------------------------------------------------------- /deeplab-pytorch/data/datasets/voc12/README.md: -------------------------------------------------------------------------------- 1 | # PASCAL VOC 2012 2 | 3 | This is an instruction for setting up PASCAL VOC dataset. 4 | 5 | ![](../../../docs/datasets/voc12.png) 6 | 7 | 1. Download PASCAL VOC 2012. 8 | 9 | ```sh 10 | $ bash scripts/setup_voc12.sh [PATH TO DOWNLOAD] 11 | ``` 12 | 13 | ``` 14 | /VOCdevkit 15 | └── VOC2012 16 | ├── Annotations 17 | ├── ImageSets 18 | │ └── Segmentation 19 | ├── JPEGImages 20 | ├── SegmentationObject 21 | └── SegmentationClass 22 | ``` 23 | 24 | 2. Add SBD augmentated training data as `SegmentationClassAug`. 25 | 26 | 27 | * Convert by yourself ([here](https://github.com/shelhamer/fcn.berkeleyvision.org/tree/master/data/pascal)). 28 | * Or download pre-converted files ([here](https://github.com/DrSleep/tensorflow-deeplab-resnet#evaluation)). 29 | 30 | 3. Download official image sets as `ImageSets/SegmentationAug`. 31 | 32 | * From https://ucla.app.box.com/s/rd9z2xvwsfpksi7mi08i2xqrj7ab4keb/file/55053033642 33 | * Or https://github.com/kazuto1011/deeplab-pytorch/files/2945588/list.zip 34 | 35 | ```sh 36 | /VOCdevkit 37 | └── VOC2012 38 | ├── Annotations 39 | ├── ImageSets 40 | │ ├── Segmentation 41 | │ └── SegmentationAug # ADDED!! 42 | │ ├── test.txt 43 | │ ├── train_aug.txt 44 | │ ├── train.txt 45 | │ ├── trainval_aug.txt 46 | │ ├── trainval.txt 47 | │ └── val.txt 48 | ├── JPEGImages 49 | ├── SegmentationObject 50 | ├── SegmentationClass 51 | └── SegmentationClassAug # ADDED!! 52 | └── 2007_000032.png 53 | ``` 54 | 55 | 1. Set the path to the dataset in ```configs/voc12.yaml```. 56 | 57 | ```yaml 58 | DATASET: voc12 59 | ROOT: # <- Write here 60 | ... 61 | ``` 62 | -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/deeplabv2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: http://kazuto1011.github.io 6 | # Created: 2017-11-19 7 | 8 | from __future__ import absolute_import, print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | 14 | from .resnet import _ConvBnReLU, _ResLayer, _Stem 15 | 16 | 17 | class _ASPP(nn.Module): 18 | """ 19 | Atrous spatial pyramid pooling (ASPP) 20 | """ 21 | 22 | def __init__(self, in_ch, out_ch, rates): 23 | super(_ASPP, self).__init__() 24 | for i, rate in enumerate(rates): 25 | self.add_module( 26 | "c{}".format(i), 27 | nn.Conv2d(in_ch, out_ch, 3, 1, padding=rate, dilation=rate, bias=True), 28 | ) 29 | 30 | for m in self.children(): 31 | nn.init.normal_(m.weight, mean=0, std=0.01) 32 | nn.init.constant_(m.bias, 0) 33 | 34 | def forward(self, x): 35 | return sum([stage(x) for stage in self.children()]) 36 | 37 | 38 | class DeepLabV2(nn.Sequential): 39 | """ 40 | DeepLab v2: Dilated ResNet + ASPP 41 | Output stride is fixed at 8 42 | """ 43 | 44 | def __init__(self, n_classes, n_blocks, atrous_rates): 45 | super(DeepLabV2, self).__init__() 46 | ch = [64 * 2 ** p for p in range(6)] 47 | self.add_module("layer1", _Stem(ch[0])) 48 | self.add_module("layer2", _ResLayer(n_blocks[0], ch[0], ch[2], 1, 1)) 49 | self.add_module("layer3", _ResLayer(n_blocks[1], ch[2], ch[3], 2, 1)) 50 | self.add_module("layer4", _ResLayer(n_blocks[2], ch[3], ch[4], 1, 2)) 51 | self.add_module("layer5", _ResLayer(n_blocks[3], ch[4], ch[5], 1, 4)) 52 | self.add_module("aspp", _ASPP(ch[5], n_classes, atrous_rates)) 53 | 54 | def freeze_bn(self): 55 | for m in self.modules(): 56 | if isinstance(m, _ConvBnReLU.BATCH_NORM): 57 | m.eval() 58 | 59 | 60 | if __name__ == "__main__": 61 | model = DeepLabV2( 62 | n_classes=21, n_blocks=[3, 4, 23, 3], atrous_rates=[6, 12, 18, 24] 63 | ) 64 | model.eval() 65 | image = torch.randn(1, 3, 513, 513) 66 | 67 | print(model) 68 | print("input:", image.shape) 69 | print("output:", model(image).shape) 70 | -------------------------------------------------------------------------------- /res.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import logging 4 | import os 5 | from os.path import exists 6 | import matplotlib as mpl 7 | import matplotlib.pyplot as plt 8 | 9 | colormaps = ['#000000', '#7F0000', '#007F00', '#7F7F00', '#00007F', '#7F007F', '#007F7F', '#7F7F7F', '#3F0000', '#BF0000', '#3F7F00', 10 | '#BF7F00', '#3F007F', '#BF007F', '#3F7F7F', '#BF7F7F', '#003F00', '#7F3F00', '#00BF00', '#7FBF00', '#003F7F'] 11 | 12 | def colormap(index): 13 | return mpl.colors.LinearSegmentedColormap.from_list('cmap', [colormaps[0], colormaps[index+1], '#FFFFFF'], 256) 14 | 15 | def load_dataset(test_lst): 16 | logging.info('Beginning loading dataset...') 17 | im_lst = [] 18 | label_lst = [] 19 | with open(test_lst) as f: 20 | test_names = f.readlines() 21 | lines = open(test_lst).read().splitlines() 22 | for line in lines: 23 | fields = line.split() 24 | im_name = fields[0] 25 | im_labels = [] 26 | for i in range(len(fields)-1): 27 | im_labels.append(int(fields[i+1])) 28 | im_lst.append(im_name) 29 | label_lst.append(im_labels) 30 | return im_lst, label_lst 31 | 32 | if __name__ == '__main__': 33 | 34 | train_lst = '/home/ubuntu/Project/datasets/VOCdevkit/VOC2012/ImageSets/Segmentation/train_cls.txt' 35 | root_folder = '/home/ubuntu/Project/datasets/VOCdevkit/VOC2012' 36 | im_lst, label_lst = load_dataset(train_lst) 37 | 38 | atten_path = './runs/exp3/accu_att' 39 | save_path = './runs/exp3/accu_att_zoom' 40 | if not exists(save_path): 41 | os.mkdir(save_path) 42 | for i in range(len(im_lst)): 43 | im_name = '{}/JPEGImages/{}.jpg'.format(root_folder, im_lst[i]) 44 | im_labels = label_lst[i] 45 | 46 | img = cv2.imread(im_name) 47 | height, width = img.shape[:2] 48 | im_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 49 | 50 | for label in im_labels: 51 | att_name = '{}/{}_{}.png'.format(atten_path, i, label) 52 | if not exists(att_name): 53 | continue 54 | att = cv2.imread(att_name, 0) 55 | 56 | 57 | att = cv2.resize(att, (width, height), interpolation=cv2.INTER_CUBIC) 58 | min_value = np.min(att) 59 | max_value = np.max(att) 60 | att = (att - min_value) / (max_value - min_value + 1e-8) 61 | att = np.array(att*255, dtype = np.uint8) 62 | 63 | #att = im_gray * 0.2 + att * 0.8 64 | save_name = '{}/{}_{}.png'.format(save_path, im_lst[i], label) 65 | #plt.imsave(save_name, att, cmap=plt.cm.jet) 66 | #plt.imsave(save_name, att, cmap=colormap(label)) 67 | cv2.imwrite(save_name, att) 68 | 69 | 70 | -------------------------------------------------------------------------------- /runs/exp1/res.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import logging 4 | import os 5 | from os.path import exists 6 | import matplotlib as mpl 7 | import matplotlib.pyplot as plt 8 | 9 | colormaps = ['#000000', '#7F0000', '#007F00', '#7F7F00', '#00007F', '#7F007F', '#007F7F', '#7F7F7F', '#3F0000', '#BF0000', '#3F7F00', 10 | '#BF7F00', '#3F007F', '#BF007F', '#3F7F7F', '#BF7F7F', '#003F00', '#7F3F00', '#00BF00', '#7FBF00', '#003F7F'] 11 | 12 | def colormap(index): 13 | return mpl.colors.LinearSegmentedColormap.from_list('cmap', [colormaps[0], colormaps[index+1], '#FFFFFF'], 256) 14 | 15 | def load_dataset(test_lst): 16 | logging.info('Beginning loading dataset...') 17 | im_lst = [] 18 | label_lst = [] 19 | with open(test_lst) as f: 20 | test_names = f.readlines() 21 | lines = open(test_lst).read().splitlines() 22 | for line in lines: 23 | fields = line.split() 24 | im_name = fields[0] 25 | im_labels = [] 26 | for i in range(len(fields)-1): 27 | im_labels.append(int(fields[i+1])) 28 | im_lst.append(im_name) 29 | label_lst.append(im_labels) 30 | return im_lst, label_lst 31 | 32 | if __name__ == '__main__': 33 | 34 | train_lst = '/home/miao/Projects/Classification/data/VOCdevkit/VOC2012/ImageSets/Segmentation/train_cls.txt' 35 | root_folder = '/home/miao/Projects/Classification/data/VOCdevkit/VOC2012' 36 | im_lst, label_lst = load_dataset(train_lst) 37 | 38 | atten_path = 'accu_att' 39 | save_path = 'accu_att_zoom' 40 | if not exists(save_path): 41 | os.mkdir(save_path) 42 | for i in range(len(im_lst)): 43 | im_name = '{}/JPEGImages/{}.jpg'.format(root_folder, im_lst[i]) 44 | im_labels = label_lst[i] 45 | 46 | img = cv2.imread(im_name) 47 | height, width = img.shape[:2] 48 | im_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 49 | 50 | for label in im_labels: 51 | att_name = '{}/{}_{}.png'.format(atten_path, i, label) 52 | if not exists(att_name): 53 | continue 54 | att = cv2.imread(att_name, 0) 55 | 56 | 57 | att = cv2.resize(att, (width, height), interpolation=cv2.INTER_CUBIC) 58 | min_value = np.min(att) 59 | max_value = np.max(att) 60 | att = (att - min_value) / (max_value - min_value + 1e-8) 61 | att = np.array(att*255, dtype = np.uint8) 62 | 63 | att = im_gray * 0.2 + att * 0.8 64 | save_name = '{}/{}_{}.png'.format(save_path, im_lst[i], label) 65 | #plt.imsave(save_name, att, cmap=plt.cm.jet) 66 | plt.imsave(save_name, att, cmap=colormap(label)) 67 | #cv2.imwrite(save_name, att) 68 | 69 | 70 | -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .resnet import * 3 | from .deeplabv1 import * 4 | from .deeplabv2 import * 5 | from .deeplabv3 import * 6 | from .deeplabv3plus import * 7 | from .msc import * 8 | 9 | 10 | def init_weights(module): 11 | if isinstance(module, nn.Conv2d): 12 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 13 | if module.bias is not None: 14 | nn.init.constant_(module.bias, 0) 15 | elif isinstance(module, nn.Linear): 16 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 17 | if module.bias is not None: 18 | nn.init.constant_(module.bias, 0) 19 | elif isinstance(module, nn.BatchNorm2d): 20 | nn.init.constant_(module.weight, 1) 21 | if module.bias is not None: 22 | nn.init.constant_(module.bias, 0) 23 | 24 | 25 | def ResNet101(n_classes): 26 | return ResNet(n_classes=n_classes, n_blocks=[3, 4, 23, 3]) 27 | 28 | 29 | def DeepLabV1_ResNet101(n_classes): 30 | return DeepLabV1(n_classes=n_classes, n_blocks=[3, 4, 23, 3]) 31 | 32 | 33 | def DeepLabV2_ResNet101_MSC(n_classes): 34 | return MSC( 35 | base=DeepLabV2( 36 | n_classes=n_classes, n_blocks=[3, 4, 23, 3], atrous_rates=[6, 12, 18, 24] 37 | ), 38 | scales=[0.5, 0.75], 39 | ) 40 | 41 | 42 | def DeepLabV2S_ResNet101_MSC(n_classes): 43 | return MSC( 44 | base=DeepLabV2( 45 | n_classes=n_classes, n_blocks=[3, 4, 23, 3], atrous_rates=[3, 6, 9, 12] 46 | ), 47 | scales=[0.5, 0.75], 48 | ) 49 | 50 | 51 | def DeepLabV3_ResNet101_MSC(n_classes, output_stride=16): 52 | if output_stride == 16: 53 | atrous_rates = [6, 12, 18] 54 | elif output_stride == 8: 55 | atrous_rates = [12, 24, 36] 56 | else: 57 | NotImplementedError 58 | 59 | base = DeepLabV3( 60 | n_classes=n_classes, 61 | n_blocks=[3, 4, 23, 3], 62 | atrous_rates=atrous_rates, 63 | multi_grids=[1, 2, 4], 64 | output_stride=output_stride, 65 | ) 66 | 67 | for name, module in base.named_modules(): 68 | if ".bn" in name: 69 | module.momentum = 0.9997 70 | 71 | return MSC(base=base, scales=[0.5, 0.75]) 72 | 73 | 74 | def DeepLabV3Plus_ResNet101_MSC(n_classes, output_stride=16): 75 | if output_stride == 16: 76 | atrous_rates = [6, 12, 18] 77 | elif output_stride == 8: 78 | atrous_rates = [12, 24, 36] 79 | else: 80 | NotImplementedError 81 | 82 | base = DeepLabV3Plus( 83 | n_classes=n_classes, 84 | n_blocks=[3, 4, 23, 3], 85 | atrous_rates=atrous_rates, 86 | multi_grids=[1, 2, 4], 87 | output_stride=output_stride, 88 | ) 89 | 90 | for name, module in base.named_modules(): 91 | if ".bn" in name: 92 | module.momentum = 0.9997 93 | 94 | return MSC(base=base, scales=[0.5, 0.75]) 95 | -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/deeplabv3plus.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: http://kazuto1011.github.io 6 | # Created: 2018-03-26 7 | 8 | from __future__ import absolute_import, print_function 9 | 10 | from collections import OrderedDict 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | 16 | from .deeplabv3 import _ASPP 17 | from .resnet import _ConvBnReLU, _ResLayer, _Stem 18 | 19 | 20 | class DeepLabV3Plus(nn.Module): 21 | """ 22 | DeepLab v3+: Dilated ResNet with multi-grid + improved ASPP + decoder 23 | """ 24 | 25 | def __init__(self, n_classes, n_blocks, atrous_rates, multi_grids, output_stride): 26 | super(DeepLabV3Plus, self).__init__() 27 | 28 | # Stride and dilation 29 | if output_stride == 8: 30 | s = [1, 2, 1, 1] 31 | d = [1, 1, 2, 4] 32 | elif output_stride == 16: 33 | s = [1, 2, 2, 1] 34 | d = [1, 1, 1, 2] 35 | 36 | # Encoder 37 | ch = [64 * 2 ** p for p in range(6)] 38 | self.layer1 = _Stem(ch[0]) 39 | self.layer2 = _ResLayer(n_blocks[0], ch[0], ch[2], s[0], d[0]) 40 | self.layer3 = _ResLayer(n_blocks[1], ch[2], ch[3], s[1], d[1]) 41 | self.layer4 = _ResLayer(n_blocks[2], ch[3], ch[4], s[2], d[2]) 42 | self.layer5 = _ResLayer(n_blocks[3], ch[4], ch[5], s[3], d[3], multi_grids) 43 | self.aspp = _ASPP(ch[5], 256, atrous_rates) 44 | concat_ch = 256 * (len(atrous_rates) + 2) 45 | self.add_module("fc1", _ConvBnReLU(concat_ch, 256, 1, 1, 0, 1)) 46 | 47 | # Decoder 48 | self.reduce = _ConvBnReLU(256, 48, 1, 1, 0, 1) 49 | self.fc2 = nn.Sequential( 50 | OrderedDict( 51 | [ 52 | ("conv1", _ConvBnReLU(304, 256, 3, 1, 1, 1)), 53 | ("conv2", _ConvBnReLU(256, 256, 3, 1, 1, 1)), 54 | ("conv3", nn.Conv2d(256, n_classes, kernel_size=1)), 55 | ] 56 | ) 57 | ) 58 | 59 | def forward(self, x): 60 | h = self.layer1(x) 61 | h = self.layer2(h) 62 | h_ = self.reduce(h) 63 | h = self.layer3(h) 64 | h = self.layer4(h) 65 | h = self.layer5(h) 66 | h = self.aspp(h) 67 | h = self.fc1(h) 68 | h = F.interpolate(h, size=h_.shape[2:], mode="bilinear", align_corners=False) 69 | h = torch.cat((h, h_), dim=1) 70 | h = self.fc2(h) 71 | h = F.interpolate(h, size=x.shape[2:], mode="bilinear", align_corners=False) 72 | return h 73 | 74 | 75 | if __name__ == "__main__": 76 | model = DeepLabV3Plus( 77 | n_classes=21, 78 | n_blocks=[3, 4, 23, 3], 79 | atrous_rates=[6, 12, 18], 80 | multi_grids=[1, 2, 4], 81 | output_stride=16, 82 | ) 83 | model.eval() 84 | image = torch.randn(1, 3, 513, 513) 85 | 86 | print(model) 87 | print("input:", image.shape) 88 | print("output:", model(image).shape) 89 | -------------------------------------------------------------------------------- /deeplab-pytorch/data/datasets/cocostuff/labels.txt: -------------------------------------------------------------------------------- 1 | 0 person 2 | 1 bicycle 3 | 2 car 4 | 3 motorcycle 5 | 4 airplane 6 | 5 bus 7 | 6 train 8 | 7 truck 9 | 8 boat 10 | 9 traffic light 11 | 10 fire hydrant 12 | 11 street sign 13 | 12 stop sign 14 | 13 parking meter 15 | 14 bench 16 | 15 bird 17 | 16 cat 18 | 17 dog 19 | 18 horse 20 | 19 sheep 21 | 20 cow 22 | 21 elephant 23 | 22 bear 24 | 23 zebra 25 | 24 giraffe 26 | 25 hat 27 | 26 backpack 28 | 27 umbrella 29 | 28 shoe 30 | 29 eye glasses 31 | 30 handbag 32 | 31 tie 33 | 32 suitcase 34 | 33 frisbee 35 | 34 skis 36 | 35 snowboard 37 | 36 sports ball 38 | 37 kite 39 | 38 baseball bat 40 | 39 baseball glove 41 | 40 skateboard 42 | 41 surfboard 43 | 42 tennis racket 44 | 43 bottle 45 | 44 plate 46 | 45 wine glass 47 | 46 cup 48 | 47 fork 49 | 48 knife 50 | 49 spoon 51 | 50 bowl 52 | 51 banana 53 | 52 apple 54 | 53 sandwich 55 | 54 orange 56 | 55 broccoli 57 | 56 carrot 58 | 57 hot dog 59 | 58 pizza 60 | 59 donut 61 | 60 cake 62 | 61 chair 63 | 62 couch 64 | 63 potted plant 65 | 64 bed 66 | 65 mirror 67 | 66 dining table 68 | 67 window 69 | 68 desk 70 | 69 toilet 71 | 70 door 72 | 71 tv 73 | 72 laptop 74 | 73 mouse 75 | 74 remote 76 | 75 keyboard 77 | 76 cell phone 78 | 77 microwave 79 | 78 oven 80 | 79 toaster 81 | 80 sink 82 | 81 refrigerator 83 | 82 blender 84 | 83 book 85 | 84 clock 86 | 85 vase 87 | 86 scissors 88 | 87 teddy bear 89 | 88 hair drier 90 | 89 toothbrush 91 | 90 hair brush 92 | 91 banner 93 | 92 blanket 94 | 93 branch 95 | 94 bridge 96 | 95 building-other 97 | 96 bush 98 | 97 cabinet 99 | 98 cage 100 | 99 cardboard 101 | 100 carpet 102 | 101 ceiling-other 103 | 102 ceiling-tile 104 | 103 cloth 105 | 104 clothes 106 | 105 clouds 107 | 106 counter 108 | 107 cupboard 109 | 108 curtain 110 | 109 desk-stuff 111 | 110 dirt 112 | 111 door-stuff 113 | 112 fence 114 | 113 floor-marble 115 | 114 floor-other 116 | 115 floor-stone 117 | 116 floor-tile 118 | 117 floor-wood 119 | 118 flower 120 | 119 fog 121 | 120 food-other 122 | 121 fruit 123 | 122 furniture-other 124 | 123 grass 125 | 124 gravel 126 | 125 ground-other 127 | 126 hill 128 | 127 house 129 | 128 leaves 130 | 129 light 131 | 130 mat 132 | 131 metal 133 | 132 mirror-stuff 134 | 133 moss 135 | 134 mountain 136 | 135 mud 137 | 136 napkin 138 | 137 net 139 | 138 paper 140 | 139 pavement 141 | 140 pillow 142 | 141 plant-other 143 | 142 plastic 144 | 143 platform 145 | 144 playingfield 146 | 145 railing 147 | 146 railroad 148 | 147 river 149 | 148 road 150 | 149 rock 151 | 150 roof 152 | 151 rug 153 | 152 salad 154 | 153 sand 155 | 154 sea 156 | 155 shelf 157 | 156 sky-other 158 | 157 skyscraper 159 | 158 snow 160 | 159 solid-other 161 | 160 stairs 162 | 161 stone 163 | 162 straw 164 | 163 structural-other 165 | 164 table 166 | 165 tent 167 | 166 textile-other 168 | 167 towel 169 | 168 tree 170 | 169 vegetable 171 | 170 wall-brick 172 | 171 wall-concrete 173 | 172 wall-other 174 | 173 wall-panel 175 | 174 wall-stone 176 | 175 wall-tile 177 | 176 wall-wood 178 | 177 water-other 179 | 178 waterdrops 180 | 179 window-blind 181 | 180 window-other 182 | 181 wood -------------------------------------------------------------------------------- /utils/Metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | import numpy as np 4 | 5 | def accuracy(logits, target, topk=(1,)): 6 | ''' 7 | Compute the top k accuracy of classification results. 8 | :param target: the ground truth label 9 | :param topk: tuple or list of the expected k values. 10 | :return: A list of the accuracy values. The list has the same lenght with para: topk 11 | ''' 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | scores = logits 15 | 16 | _, pred = scores.topk(maxk, 1, True, True) 17 | pred = pred.t() 18 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 19 | 20 | res = [] 21 | for k in topk: 22 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 23 | res.append(correct_k.mul_(100.0 / batch_size)) 24 | return res 25 | 26 | 27 | from sklearn import metrics 28 | def get_mAP(gt_labels, pred_scores): 29 | n_classes = np.shape(gt_labels)[1] 30 | results = [] 31 | for i in range(n_classes): 32 | res = metrics.average_precision_score(gt_labels[:,i], pred_scores[:,i]) 33 | results.append(res) 34 | 35 | results = map(lambda x: '%.3f'%(x), results) 36 | cls_map = np.array(map(float, results)) 37 | return cls_map 38 | 39 | def get_AUC(gt_labels, pred_scores): 40 | res = metrics.roc_auc_score(gt_labels, pred_scores) 41 | return res 42 | 43 | def _to_numpy(v): 44 | v = torch.squeeze(v) 45 | if torch.is_tensor(v): 46 | v = v.cpu() 47 | v = v.numpy() 48 | elif isinstance(v, torch.autograd.Variable): 49 | v = v.cpu().data.numpy() 50 | 51 | return v 52 | 53 | def get_iou(pred, gt): 54 | ''' 55 | IoU which is averaged by images 56 | :param pred: 57 | :param gt: 58 | :return: 59 | ''' 60 | pred = _to_numpy(pred) 61 | gt = _to_numpy(gt) 62 | pred[gt==255] = 255 63 | 64 | assert pred.shape == gt.shape 65 | 66 | gt = gt.astype(np.float32) 67 | pred = pred.astype(np.float32) 68 | 69 | # max_label = int(args['--NoLabels']) - 1 # labels from 0,1, ... 20(for VOC) 70 | count = np.zeros((20 + 1,)) 71 | for j in range(20 + 1): 72 | x = np.where(pred == j) 73 | p_idx_j = set(zip(x[0].tolist(), x[1].tolist())) 74 | x = np.where(gt == j) 75 | GT_idx_j = set(zip(x[0].tolist(), x[1].tolist())) 76 | # pdb.set_trace() 77 | n_jj = set.intersection(p_idx_j, GT_idx_j) 78 | u_jj = set.union(p_idx_j, GT_idx_j) 79 | 80 | if len(GT_idx_j) != 0: 81 | count[j] = float(len(n_jj)) / float(len(u_jj)) 82 | 83 | result_class = count 84 | unique_classes = len(np.unique(gt))-1 if 255 in np.unique(gt).tolist() else len(np.unique(gt)) 85 | # unique_classes = len(np.unique(gt)) 86 | Aiou = np.sum(result_class[:]) / float(unique_classes) 87 | 88 | return Aiou 89 | 90 | def fast_hist(pred, gt, n=21): 91 | pred = _to_numpy(pred) 92 | gt = _to_numpy(gt) 93 | k = (gt >= 0) & (gt < n) 94 | return np.bincount(n * pred[k].astype(int) + gt[k], minlength=n**2).reshape(n, n) 95 | 96 | def get_voc_iou(hist): 97 | miou = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) 98 | return miou 99 | 100 | -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/deeplabv3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: http://kazuto1011.github.io 6 | # Created: 2018-03-26 7 | 8 | from __future__ import absolute_import, print_function 9 | 10 | from collections import OrderedDict 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | 16 | from .resnet import _ConvBnReLU, _ResLayer, _Stem 17 | 18 | 19 | class _ImagePool(nn.Module): 20 | def __init__(self, in_ch, out_ch): 21 | super().__init__() 22 | self.pool = nn.AdaptiveAvgPool2d(1) 23 | self.conv = _ConvBnReLU(in_ch, out_ch, 1, 1, 0, 1) 24 | 25 | def forward(self, x): 26 | _, _, H, W = x.shape 27 | h = self.pool(x) 28 | h = self.conv(h) 29 | h = F.interpolate(h, size=(H, W), mode="bilinear", align_corners=False) 30 | return h 31 | 32 | 33 | class _ASPP(nn.Module): 34 | """ 35 | Atrous spatial pyramid pooling with image-level feature 36 | """ 37 | 38 | def __init__(self, in_ch, out_ch, rates): 39 | super(_ASPP, self).__init__() 40 | self.stages = nn.Module() 41 | self.stages.add_module("c0", _ConvBnReLU(in_ch, out_ch, 1, 1, 0, 1)) 42 | for i, rate in enumerate(rates): 43 | self.stages.add_module( 44 | "c{}".format(i + 1), 45 | _ConvBnReLU(in_ch, out_ch, 3, 1, padding=rate, dilation=rate), 46 | ) 47 | self.stages.add_module("imagepool", _ImagePool(in_ch, out_ch)) 48 | 49 | def forward(self, x): 50 | return torch.cat([stage(x) for stage in self.stages.children()], dim=1) 51 | 52 | 53 | class DeepLabV3(nn.Sequential): 54 | """ 55 | DeepLab v3: Dilated ResNet with multi-grid + improved ASPP 56 | """ 57 | 58 | def __init__(self, n_classes, n_blocks, atrous_rates, multi_grids, output_stride): 59 | super(DeepLabV3, self).__init__() 60 | 61 | # Stride and dilation 62 | if output_stride == 8: 63 | s = [1, 2, 1, 1] 64 | d = [1, 1, 2, 4] 65 | elif output_stride == 16: 66 | s = [1, 2, 2, 1] 67 | d = [1, 1, 1, 2] 68 | 69 | ch = [64 * 2 ** p for p in range(6)] 70 | self.add_module("layer1", _Stem(ch[0])) 71 | self.add_module("layer2", _ResLayer(n_blocks[0], ch[0], ch[2], s[0], d[0])) 72 | self.add_module("layer3", _ResLayer(n_blocks[1], ch[2], ch[3], s[1], d[1])) 73 | self.add_module("layer4", _ResLayer(n_blocks[2], ch[3], ch[4], s[2], d[2])) 74 | self.add_module( 75 | "layer5", _ResLayer(n_blocks[3], ch[4], ch[5], s[3], d[3], multi_grids) 76 | ) 77 | self.add_module("aspp", _ASPP(ch[5], 256, atrous_rates)) 78 | concat_ch = 256 * (len(atrous_rates) + 2) 79 | self.add_module("fc1", _ConvBnReLU(concat_ch, 256, 1, 1, 0, 1)) 80 | self.add_module("fc2", nn.Conv2d(256, n_classes, kernel_size=1)) 81 | 82 | 83 | if __name__ == "__main__": 84 | model = DeepLabV3( 85 | n_classes=21, 86 | n_blocks=[3, 4, 23, 3], 87 | atrous_rates=[6, 12, 18], 88 | multi_grids=[1, 2, 4], 89 | output_stride=8, 90 | ) 91 | model.eval() 92 | image = torch.randn(1, 3, 513, 513) 93 | 94 | print(model) 95 | print("input:", image.shape) 96 | print("output:", model(image).shape) 97 | -------------------------------------------------------------------------------- /models/vgg1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.utils.model_zoo as model_zoo 4 | import torch.nn.functional as F 5 | import math 6 | import cv2 7 | import numpy as np 8 | import os 9 | 10 | model_urls = {'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth'} 11 | 12 | class VGG(nn.Module): 13 | 14 | def __init__(self, features, num_classes=20, init_weights=True): 15 | super(VGG, self).__init__() 16 | self.features = features 17 | self.extra_convs = nn.Sequential( 18 | nn.Conv2d(512, 512, kernel_size=3, padding=1), 19 | nn.ReLU(True), 20 | nn.Conv2d(512, 512, kernel_size=3, padding=1), 21 | nn.ReLU(True), 22 | nn.Conv2d(512, 512, kernel_size=3, padding=1), 23 | nn.ReLU(True), 24 | nn.Conv2d(512,num_classes,1) 25 | ) 26 | self._initialize_weights() 27 | 28 | def forward(self, x): 29 | x = self.features(x) 30 | x = self.extra_convs(x) 31 | return x 32 | 33 | def _initialize_weights(self): 34 | for m in self.modules(): 35 | if isinstance(m, nn.Conv2d): 36 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 37 | #m.weight.data.normal_(0, math.sqrt(2. / n)) 38 | m.weight.data.normal_(0, 0.01) 39 | if m.bias is not None: 40 | m.bias.data.zero_() 41 | elif isinstance(m, nn.BatchNorm2d): 42 | m.weight.data.fill_(1) 43 | m.bias.data.zero_() 44 | elif isinstance(m, nn.Linear): 45 | m.weight.data.normal_(0, 0.01) 46 | m.bias.data.zero_() 47 | 48 | def get_parameter_groups(self): 49 | groups = ([], [], [], []) 50 | 51 | for name, value in self.named_parameters(): 52 | 53 | if 'extra' in name: 54 | if 'weight' in name: 55 | groups[2].append(value) 56 | else: 57 | groups[3].append(value) 58 | else: 59 | if 'weight' in name: 60 | groups[0].append(value) 61 | else: 62 | groups[1].append(value) 63 | print(groups[2]) 64 | return groups 65 | 66 | 67 | def make_layers(cfg, batch_norm=False): 68 | layers = [] 69 | in_channels = 3 70 | for i, v in enumerate(cfg): 71 | if v == 'M': 72 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 73 | elif v == 'N': 74 | layers += [nn.MaxPool2d(kernel_size=3, stride=1, padding=1)] 75 | else: 76 | if i > 13: 77 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, dilation=2, padding=2) 78 | else: 79 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 80 | if batch_norm: 81 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 82 | else: 83 | layers += [conv2d, nn.ReLU(inplace=True)] 84 | in_channels = v 85 | return nn.Sequential(*layers) 86 | 87 | 88 | cfg = { 89 | 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 90 | 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 91 | 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 92 | 'D1': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'N', 512, 512, 512], 93 | 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 94 | } 95 | 96 | 97 | def vgg16(pretrained=False, **kwargs): 98 | model = VGG(make_layers(cfg['D1']), **kwargs) 99 | if pretrained: 100 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16']), strict=False) 101 | return model 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OAA-PyTorch 2 | The Official PyTorch code for ["Integral Object Mining via Online Attention Accumulation"](http://openaccess.thecvf.com/content_ICCV_2019/papers/Jiang_Integral_Object_Mining_via_Online_Attention_Accumulation_ICCV_2019_paper.pdf), which is implemented based on the code of [psa](https://github.com/jiwoon-ahn/psa) and [ACoL](https://github.com/xiaomengyc/ACoL). The segmentation framework is borrowed from [deeplab-pytorch](https://github.com/kazuto1011/deeplab-pytorch). 3 | 4 | ## Installation 5 | python3 6 | torch >= 1.0 7 | tqdm 8 | torchvision 9 | python-opencv 10 | 11 | Download the [VOCdevkit.tar.gz](https://drive.google.com/file/d/1jnHE6Sau0tHI7X6JQKhzHov-vseYbrf9/view?usp=sharing) file and extract it into data/ folder. 12 | 13 | ## Online Attention Accumulation 14 | ``` 15 | cd OAA-PyTorch/ 16 | ./train.sh 17 | ``` 18 | After the training process, you can resize the accumulated attention map to original image size. 19 | ``` 20 | python res.py 21 | ``` 22 | For a comparison with the attention maps generated by the final classification model, you can generate them by 23 | ``` 24 | ./test.sh 25 | ``` 26 | 27 | ## Integal Attention Learning 28 | If you want to skip the online attention accumulation process to train the integral model directly, Download the [pre-accumulated maps](https://drive.google.com/file/d/171hBXJu1Ty8eqiPtdqgZlR0D980WVBnr/view?usp=sharing) and extract them to `exp1/`. 29 | ``` 30 | ./train_iam.sh 31 | ./test_iam.sh 32 | ``` 33 | 34 | ## Attention Drop Layer 35 | ``` 36 | ./train+.sh 37 | ``` 38 | After the training process, you can resize the accumulated attention map to original image size. 39 | ``` 40 | python res.py 41 | ``` 42 | 43 | 44 | ## Weakly Supervised Segmentation 45 | To train a segmentation model, you need to generate pseudo segmentation labels first by 46 | ``` 47 | python gen_gt.py 48 | ``` 49 | This code will generate pseudo segmentation labels in './data/VOCdevkit/VOC2012/proxy-gt/'. 50 | Then you can train the [deeplab-pytorch](https://github.com/kazuto1011/deeplab-pytorch) model as follows: 51 | ``` 52 | cd deeplab-pytorch 53 | bash scripts/setup_caffemodels.sh 54 | python convert.py --dataset coco 55 | python convert.py --dataset voc12 56 | ``` 57 | Train the segmentation model by 58 | ``` 59 | python main.py train \ 60 | --config-path configs/voc2012.yaml 61 | ``` 62 | Test the segmentation model by 63 | ``` 64 | python main.py test \ 65 | --config-path configs/voc12.yaml \ 66 | --model-path data/models/voc12/deeplabv2_resnet101_msc/train_aug/checkpoint_final.pth 67 | ``` 68 | Apply the crf post-processing by 69 | ``` 70 | python main.py crf \ 71 | --config-path configs/voc12.yaml 72 | ``` 73 | ## Performance 74 | Method |mIoU | mIoU (crf) 75 | --- |:---:|:---: 76 | OAA | 65.7 | 66.9 77 | OAA+ | 66.6 | 67.8 78 | OAA-drop | 67.5 | 68.8 79 | 80 | If you have any question about OAA, please feel free to contact [Me](https://pengtaojiang.github.io/) (pt.jiang AT mail DOT nankai.edu.cn). 81 | 82 | ## Citation 83 | If you use these codes and models in your research, please cite: 84 | ``` 85 | @inproceedings{jiang2019integral, 86 | title={Integral Object Mining via Online Attention Accumulation}, 87 | author={Jiang, Peng-Tao and Hou, Qibin and Cao, Yang and Cheng, Ming-Ming and Wei, Yunchao and Xiong, Hong-Kai}, 88 | booktitle={Proceedings of the IEEE International Conference on Computer Vision}, 89 | pages={2070--2079}, 90 | year={2019} 91 | } 92 | ``` 93 | ``` 94 | @article{jiang2021online, 95 | title={Online Attention Accumulation for Weakly Supervised Semantic Segmentation}, 96 | author={Jiang, Peng-Tao and Han, Ling-Hao and Hou, Qibin and Cheng, Ming-Ming and Wei, Yunchao}, 97 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 98 | year={2021}, 99 | publisher={IEEE} 100 | } 101 | ``` 102 | ## License 103 | The code is released under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License for NonCommercial use only. Any commercial use should get formal permission first. 104 | 105 | -------------------------------------------------------------------------------- /deeplab-pytorch/libs/datasets/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: http://kazuto1011.github.io 6 | # Created: 2017-10-30 7 | 8 | import random 9 | 10 | import cv2 11 | import numpy as np 12 | import torch 13 | from PIL import Image 14 | from torch.utils import data 15 | 16 | 17 | class _BaseDataset(data.Dataset): 18 | """ 19 | Base dataset class 20 | """ 21 | 22 | def __init__( 23 | self, 24 | root, 25 | split, 26 | ignore_label, 27 | mean_bgr, 28 | augment=True, 29 | base_size=None, 30 | crop_size=321, 31 | scales=(1.0), 32 | flip=True, 33 | ): 34 | self.root = root 35 | self.split = split 36 | self.ignore_label = ignore_label 37 | self.mean_bgr = np.array(mean_bgr) 38 | self.augment = augment 39 | self.base_size = base_size 40 | self.crop_size = crop_size 41 | self.scales = scales 42 | self.flip = flip 43 | self.files = [] 44 | self._set_files() 45 | 46 | cv2.setNumThreads(0) 47 | 48 | def _set_files(self): 49 | """ 50 | Create a file path/image id list. 51 | """ 52 | raise NotImplementedError() 53 | 54 | def _load_data(self, image_id): 55 | """ 56 | Load the image and label in numpy.ndarray 57 | """ 58 | raise NotImplementedError() 59 | 60 | def _augmentation(self, image, label): 61 | # Scaling 62 | h, w = label.shape 63 | if self.base_size: 64 | if h > w: 65 | h, w = (self.base_size, int(self.base_size * w / h)) 66 | else: 67 | h, w = (int(self.base_size * h / w), self.base_size) 68 | scale_factor = random.choice(self.scales) 69 | h, w = (int(h * scale_factor), int(w * scale_factor)) 70 | image = cv2.resize(image, (w, h), interpolation=cv2.INTER_LINEAR) 71 | label = Image.fromarray(label).resize((w, h), resample=Image.NEAREST) 72 | label = np.asarray(label, dtype=np.int64) 73 | 74 | # Padding to fit for crop_size 75 | h, w = label.shape 76 | pad_h = max(self.crop_size - h, 0) 77 | pad_w = max(self.crop_size - w, 0) 78 | pad_kwargs = { 79 | "top": 0, 80 | "bottom": pad_h, 81 | "left": 0, 82 | "right": pad_w, 83 | "borderType": cv2.BORDER_CONSTANT, 84 | } 85 | if pad_h > 0 or pad_w > 0: 86 | image = cv2.copyMakeBorder(image, value=self.mean_bgr, **pad_kwargs) 87 | label = cv2.copyMakeBorder(label, value=self.ignore_label, **pad_kwargs) 88 | 89 | # Cropping 90 | h, w = label.shape 91 | start_h = random.randint(0, h - self.crop_size) 92 | start_w = random.randint(0, w - self.crop_size) 93 | end_h = start_h + self.crop_size 94 | end_w = start_w + self.crop_size 95 | image = image[start_h:end_h, start_w:end_w] 96 | label = label[start_h:end_h, start_w:end_w] 97 | 98 | if self.flip: 99 | # Random flipping 100 | if random.random() < 0.5: 101 | image = np.fliplr(image).copy() # HWC 102 | label = np.fliplr(label).copy() # HW 103 | return image, label 104 | 105 | def __getitem__(self, index): 106 | image_id, image, label = self._load_data(index) 107 | if self.augment: 108 | image, label = self._augmentation(image, label) 109 | # Mean subtraction 110 | image -= self.mean_bgr 111 | # HWC -> CHW 112 | image = image.transpose(2, 0, 1) 113 | return image_id, image.astype(np.float32), label.astype(np.int64) 114 | 115 | def __len__(self): 116 | return len(self.files) 117 | 118 | def __repr__(self): 119 | fmt_str = "Dataset: " + self.__class__.__name__ + "\n" 120 | fmt_str += " # data: {}\n".format(self.__len__()) 121 | fmt_str += " Split: {}\n".format(self.split) 122 | fmt_str += " Root: {}".format(self.root) 123 | return fmt_str 124 | -------------------------------------------------------------------------------- /scripts/test_iam.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | sys.path.append(os.getcwd()) 4 | 5 | import cv2 6 | import torch 7 | import numpy as np 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import argparse 11 | import torchvision 12 | from torchvision import models, transforms 13 | from torch.utils.data import DataLoader 14 | from utils.LoadData import test_data_loader 15 | from utils.Restore import restore 16 | import matplotlib.pyplot as plt 17 | from models import vgg1 18 | from tqdm import tqdm 19 | import matplotlib as mpl 20 | import matplotlib.pyplot as plt 21 | 22 | colormaps = ['#000000', '#7F0000', '#007F00', '#7F7F00', '#00007F', '#7F007F', '#007F7F', '#7F7F7F', '#3F0000', '#BF0000', '#3F7F00', 23 | '#BF7F00', '#3F007F', '#BF007F', '#3F7F7F', '#BF7F7F', '#003F00', '#7F3F00', '#00BF00', '#7FBF00', '#003F7F'] 24 | 25 | def colormap(index): 26 | return mpl.colors.LinearSegmentedColormap.from_list('cmap', [colormaps[0], colormaps[index+1], '#FFFFFF'], 256) 27 | 28 | def get_arguments(): 29 | parser = argparse.ArgumentParser(description='OAA') 30 | parser.add_argument("--root_dir", type=str, default='./') 31 | parser.add_argument("--save_dir", type=str, default='') 32 | parser.add_argument("--img_dir", type=str, default='') 33 | parser.add_argument("--test_list", type=str, default='') 34 | parser.add_argument("--batch_size", type=int, default=1) 35 | parser.add_argument("--input_size", type=int, default=256) 36 | parser.add_argument("--dataset", type=str, default='imagenet') 37 | parser.add_argument("--num_classes", type=int, default=20) 38 | parser.add_argument("--arch", type=str,default='vgg_v0') 39 | parser.add_argument("--restore_from", type=str, default='') 40 | parser.add_argument("--num_workers", type=int, default=20) 41 | 42 | return parser.parse_args() 43 | 44 | def get_model(args): 45 | model = vgg1.vgg16(num_classes=args.num_classes) 46 | model = torch.nn.DataParallel(model).cuda() 47 | 48 | pretrained_dict = torch.load(args.restore_from)['state_dict'] 49 | model_dict = model.state_dict() 50 | 51 | print(model_dict.keys()) 52 | print(pretrained_dict.keys()) 53 | 54 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()} 55 | print("Weights cannot be loaded:") 56 | print([k for k in model_dict.keys() if k not in pretrained_dict.keys()]) 57 | 58 | model_dict.update(pretrained_dict) 59 | model.load_state_dict(model_dict) 60 | 61 | return model 62 | 63 | def validate(args): 64 | print('\nvalidating ... ', flush=True, end='') 65 | 66 | model = get_model(args) 67 | model.eval() 68 | val_loader = test_data_loader(args) 69 | 70 | if not os.path.exists(args.save_dir): 71 | os.mkdir(args.save_dir) 72 | with torch.no_grad(): 73 | for idx, dat in tqdm(enumerate(val_loader)): 74 | img_name, img, label_in = dat 75 | label = label_in.cuda(non_blocking=True) 76 | logits = model(img) 77 | 78 | cv_im = cv2.imread(img_name[0]) 79 | cv_im_gray = cv2.cvtColor(cv_im, cv2.COLOR_BGR2GRAY) 80 | height, width = cv_im.shape[:2] 81 | 82 | for l, featmap in enumerate(logits): 83 | maps = featmap.cpu().data.numpy() 84 | im_name = args.save_dir + img_name[0].split('/')[-1][:-4] 85 | labels = label_in.long().numpy()[0] 86 | for i in range(int(args.num_classes)): 87 | if labels[i] == 1: 88 | att = maps[i] 89 | att[att < 0] = 0 90 | att = att / (np.max(att) + 1e-8) 91 | att = np.array(att * 255, dtype=np.uint8) 92 | out_name = im_name + '_{}.png'.format(i) 93 | att = cv2.resize(att, (width, height), interpolation=cv2.INTER_CUBIC) 94 | #att = cv_im_gray * 0.2 + att * 0.8 95 | cv2.imwrite(out_name, att) 96 | #plt.imsave(out_name, att, cmap=colormap(i)) 97 | 98 | if __name__ == '__main__': 99 | args = get_arguments() 100 | validate(args) 101 | -------------------------------------------------------------------------------- /scripts/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.getcwd()) 4 | 5 | import cv2 6 | import torch 7 | import numpy as np 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import argparse 11 | import torchvision 12 | from torchvision import models, transforms 13 | from torch.utils.data import DataLoader 14 | from utils.LoadData import test_data_loader 15 | from utils.Restore import restore 16 | import matplotlib.pyplot as plt 17 | from models import vgg 18 | from tqdm import tqdm 19 | import matplotlib as mpl 20 | import matplotlib.pyplot as plt 21 | 22 | colormaps = ['#000000', '#7F0000', '#007F00', '#7F7F00', '#00007F', '#7F007F', '#007F7F', '#7F7F7F', '#3F0000', '#BF0000', '#3F7F00', 23 | '#BF7F00', '#3F007F', '#BF007F', '#3F7F7F', '#BF7F7F', '#003F00', '#7F3F00', '#00BF00', '#7FBF00', '#003F7F'] 24 | 25 | def colormap(index): 26 | return mpl.colors.LinearSegmentedColormap.from_list('cmap', [colormaps[0], colormaps[index+1], '#FFFFFF'], 256) 27 | 28 | def get_arguments(): 29 | parser = argparse.ArgumentParser(description='ACoL') 30 | parser.add_argument("--root_dir", type=str, default='') 31 | parser.add_argument("--save_dir", type=str, default='') 32 | parser.add_argument("--img_dir", type=str, default='') 33 | parser.add_argument("--test_list", type=str, default='') 34 | parser.add_argument("--batch_size", type=int, default=1) 35 | parser.add_argument("--input_size", type=int, default=256) 36 | parser.add_argument("--dataset", type=str, default='voc2012') 37 | parser.add_argument("--num_classes", type=int, default=20) 38 | parser.add_argument("--num_workers", type=int, default=20) 39 | parser.add_argument("--arch", type=str,default='vgg_v0') 40 | parser.add_argument("--restore_from", type=str, default='') 41 | 42 | return parser.parse_args() 43 | 44 | def get_model(args): 45 | model = vgg.vgg16(num_classes=args.num_classes) 46 | model = torch.nn.DataParallel(model).cuda() 47 | 48 | pretrained_dict = torch.load(args.restore_from)['state_dict'] 49 | model_dict = model.state_dict() 50 | 51 | print(model_dict.keys()) 52 | print(pretrained_dict.keys()) 53 | 54 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()} 55 | print("Weights cannot be loaded:") 56 | print([k for k in model_dict.keys() if k not in pretrained_dict.keys()]) 57 | 58 | model_dict.update(pretrained_dict) 59 | model.load_state_dict(model_dict) 60 | 61 | return model 62 | 63 | def validate(args): 64 | print('\nvalidating ... ', flush=True, end='') 65 | 66 | model = get_model(args) 67 | model.eval() 68 | val_loader = test_data_loader(args) 69 | 70 | if not os.path.exists(args.save_dir): 71 | os.mkdir(args.save_dir) 72 | 73 | with torch.no_grad(): 74 | for idx, dat in tqdm(enumerate(val_loader)): 75 | img_name, img, label_in = dat 76 | label = label_in.cuda(non_blocking=True) 77 | logits = model(img) 78 | last_featmaps = model.module.get_heatmaps() 79 | 80 | cv_im = cv2.imread(img_name[0]) 81 | cv_im_gray = cv2.cvtColor(cv_im, cv2.COLOR_BGR2GRAY) 82 | height, width = cv_im.shape[:2] 83 | 84 | for l, featmap in enumerate(last_featmaps): 85 | maps = featmap.cpu().data.numpy() 86 | im_name = args.save_dir + img_name[0].split('/')[-1][:-4] 87 | labels = label_in.long().numpy()[0] 88 | for i in range(int(args.num_classes)): 89 | if labels[i] == 1: 90 | att = maps[i] 91 | att[att < 0] = 0 92 | att = att / (np.max(att) + 1e-8) 93 | att = np.array(att * 255, dtype=np.uint8) 94 | out_name = im_name + '_{}.png'.format(i) 95 | att = cv2.resize(att, (width, height), interpolation=cv2.INTER_CUBIC) 96 | #att = cv_im_gray * 0.2 + att * 0.8 97 | cv2.imwrite(out_name, att) 98 | #plt.imsave(out_name, att, cmap=colormap(i)) 99 | 100 | if __name__ == '__main__': 101 | args = get_arguments() 102 | validate(args) 103 | -------------------------------------------------------------------------------- /gen_gt.py: -------------------------------------------------------------------------------- 1 | 2 | ####################################################################### 3 | # This file is provided by Peng-Tao Jiang. If you have any questions, # 4 | # please feel free to contact me (pt.jiang@mail.nankai.edu.cn). # 5 | ####################################################################### 6 | import cv2 7 | from PIL import Image 8 | import numpy as np 9 | import pydensecrf.densecrf as dcrf 10 | import multiprocessing 11 | import os 12 | from os.path import exists 13 | 14 | palette = [0,0,0, 128,0,0, 0,128,0, 128,128,0, 0,0,128, 128,0,128, 0,128,128, 128,128,128, 15 | 64,0,0, 192,0,0, 64,128,0, 192,128,0, 64,0,128, 192,0,128, 64,128,128, 192,128,128, 16 | 0,64,0, 128,64,0, 0,192,0, 128,192,0, 0,64,128, 128,64,128, 0,192,128, 128,192,128, 17 | 64,64,0, 192,64,0, 64,192,0, 192,192,0] 18 | 19 | cats = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 20 | 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv'] 21 | 22 | # set path for data 23 | data_path = '/home/ubuntu/Project/datasets/VOCdevkit/VOC2012/' 24 | train_lst_path = data_path + 'ImageSets/Segmentation/train_cls.txt' 25 | im_path = data_path + 'JPEGImages/' 26 | sal_path = data_path + 'saliency_aug/' 27 | att_path = './runs/exp3/accu_att_zoom/' 28 | save_path = './data/VOCdevkit/VOC2012/proxy-gt/' 29 | 30 | if not exists(save_path): 31 | os.makedirs(save_path) 32 | 33 | with open(train_lst_path) as f: 34 | lines = f.readlines() 35 | 36 | # generate proxy ground-truth 37 | def gen_gt(index): 38 | line = lines[index] 39 | line = line[:-1] 40 | fields = line.split() 41 | name = fields[0] 42 | im_name = im_path + name + '.jpg' 43 | bg_name = sal_path + name + '.png' 44 | #print(bg_name) 45 | if not os.path.exists(bg_name): 46 | return 47 | 48 | sal = cv2.imread(bg_name, 0) 49 | height, width = sal.shape 50 | gt = np.zeros((21, height, width), dtype=np.float32) 51 | sal = np.array(sal, dtype=np.float32) 52 | 53 | # some thresholds. 54 | conflict = 0.9 55 | fg_thr = 0.3 56 | # the below two values are used for generating uncertainty pixels 57 | bg_thr = 32 58 | att_thr = 0.8 59 | 60 | # use saliency map to provide background cues 61 | gt[0] = (1 - (sal / 255)) 62 | init_gt = np.zeros((height, width), dtype=float) 63 | sal_att = sal.copy() 64 | 65 | for i in range(len(fields) - 1): 66 | k = i + 1 67 | cls = int(fields[k]) 68 | att_name = att_path + name + '_' + str(cls) + '.png' 69 | if not exists(att_name): 70 | continue 71 | 72 | # normalize attention to [0, 1] 73 | att = cv2.imread(att_name, 0) 74 | att = (att - np.min(att)) / (np.max(att) - np.min(att) + 1e-8) 75 | gt[cls+1] = att.copy() 76 | sal_att = np.maximum(sal_att, (att > att_thr) *255) 77 | 78 | 79 | # throw low confidence values for all classes 80 | gt[gt < fg_thr] = 0 81 | 82 | # conflict pixels with multiple confidence values 83 | bg = np.array(gt > conflict, dtype=np.uint8) 84 | bg = np.sum(bg, axis=0) 85 | gt = gt.argmax(0).astype(np.uint8) 86 | gt[bg > 1] = 255 87 | 88 | # pixels regarded as background but confidence saliency values 89 | bg = np.array(sal_att >= bg_thr, dtype=np.uint8) * np.array(gt == 0, dtype=np.uint8) 90 | gt[bg > 0] = 255 91 | 92 | # this is an engineering idea, for an image with a small ratio of semantic objects, 93 | # we ignore the whole image, I find that this operation have little impact on 94 | out = gt.copy() 95 | valid = np.array((out > 0) & (out < 255), dtype=int).sum() 96 | ratio = float(valid) / float(height * width) 97 | if ratio < 0.01: 98 | out[...] = 255 99 | 100 | # output the proxy labels using the VOC12 label format 101 | out = Image.fromarray(out.astype(np.uint8), mode='P') 102 | #out.putpalette(palette) 103 | out_name = save_path + name + '.png' 104 | out.save(out_name) 105 | 106 | ### Parallel Mode 107 | pool = multiprocessing.Pool(processes=16) 108 | pool.map(gen_gt, range(len(lines))) 109 | # pool.map(gen_gt, range(50)) 110 | pool.close() 111 | pool.join() 112 | 113 | # Loop Mode 114 | #for i in range(len(lines)): 115 | # gen_gt(i) 116 | -------------------------------------------------------------------------------- /deeplab-pytorch/libs/models/resnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: http://kazuto1011.github.io 6 | # Created: 2017-11-19 7 | 8 | from __future__ import absolute_import, print_function 9 | 10 | from collections import OrderedDict 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | 16 | try: 17 | from encoding.nn import SyncBatchNorm 18 | 19 | _BATCH_NORM = SyncBatchNorm 20 | except: 21 | _BATCH_NORM = nn.BatchNorm2d 22 | 23 | _BOTTLENECK_EXPANSION = 4 24 | 25 | 26 | class _ConvBnReLU(nn.Sequential): 27 | """ 28 | Cascade of 2D convolution, batch norm, and ReLU. 29 | """ 30 | 31 | BATCH_NORM = _BATCH_NORM 32 | 33 | def __init__( 34 | self, in_ch, out_ch, kernel_size, stride, padding, dilation, relu=True 35 | ): 36 | super(_ConvBnReLU, self).__init__() 37 | self.add_module( 38 | "conv", 39 | nn.Conv2d( 40 | in_ch, out_ch, kernel_size, stride, padding, dilation, bias=False 41 | ), 42 | ) 43 | self.add_module("bn", _BATCH_NORM(out_ch, eps=1e-5, momentum=1 - 0.999)) 44 | 45 | if relu: 46 | self.add_module("relu", nn.ReLU()) 47 | 48 | 49 | class _Bottleneck(nn.Module): 50 | """ 51 | Bottleneck block of MSRA ResNet. 52 | """ 53 | 54 | def __init__(self, in_ch, out_ch, stride, dilation, downsample): 55 | super(_Bottleneck, self).__init__() 56 | mid_ch = out_ch // _BOTTLENECK_EXPANSION 57 | self.reduce = _ConvBnReLU(in_ch, mid_ch, 1, stride, 0, 1, True) 58 | self.conv3x3 = _ConvBnReLU(mid_ch, mid_ch, 3, 1, dilation, dilation, True) 59 | self.increase = _ConvBnReLU(mid_ch, out_ch, 1, 1, 0, 1, False) 60 | self.shortcut = ( 61 | _ConvBnReLU(in_ch, out_ch, 1, stride, 0, 1, False) 62 | if downsample 63 | else nn.Identity() 64 | ) 65 | 66 | def forward(self, x): 67 | h = self.reduce(x) 68 | h = self.conv3x3(h) 69 | h = self.increase(h) 70 | h += self.shortcut(x) 71 | return F.relu(h) 72 | 73 | 74 | class _ResLayer(nn.Sequential): 75 | """ 76 | Residual layer with multi grids 77 | """ 78 | 79 | def __init__(self, n_layers, in_ch, out_ch, stride, dilation, multi_grids=None): 80 | super(_ResLayer, self).__init__() 81 | 82 | if multi_grids is None: 83 | multi_grids = [1 for _ in range(n_layers)] 84 | else: 85 | assert n_layers == len(multi_grids) 86 | 87 | # Downsampling is only in the first block 88 | for i in range(n_layers): 89 | self.add_module( 90 | "block{}".format(i + 1), 91 | _Bottleneck( 92 | in_ch=(in_ch if i == 0 else out_ch), 93 | out_ch=out_ch, 94 | stride=(stride if i == 0 else 1), 95 | dilation=dilation * multi_grids[i], 96 | downsample=(True if i == 0 else False), 97 | ), 98 | ) 99 | 100 | 101 | class _Stem(nn.Sequential): 102 | """ 103 | The 1st conv layer. 104 | Note that the max pooling is different from both MSRA and FAIR ResNet. 105 | """ 106 | 107 | def __init__(self, out_ch): 108 | super(_Stem, self).__init__() 109 | self.add_module("conv1", _ConvBnReLU(3, out_ch, 7, 2, 3, 1)) 110 | self.add_module("pool", nn.MaxPool2d(3, 2, 1, ceil_mode=True)) 111 | 112 | 113 | class ResNet(nn.Sequential): 114 | def __init__(self, n_classes, n_blocks): 115 | super(ResNet, self).__init__() 116 | ch = [64 * 2 ** p for p in range(6)] 117 | self.add_module("layer1", _Stem(ch[0])) 118 | self.add_module("layer2", _ResLayer(n_blocks[0], ch[0], ch[2], 1, 1)) 119 | self.add_module("layer3", _ResLayer(n_blocks[1], ch[2], ch[3], 2, 1)) 120 | self.add_module("layer4", _ResLayer(n_blocks[2], ch[3], ch[4], 2, 1)) 121 | self.add_module("layer5", _ResLayer(n_blocks[3], ch[4], ch[5], 2, 1)) 122 | self.add_module("pool5", nn.AdaptiveAvgPool2d(1)) 123 | self.add_module("flatten", nn.Flatten()) 124 | self.add_module("fc", nn.Linear(ch[5], n_classes)) 125 | 126 | 127 | if __name__ == "__main__": 128 | model = ResNet(n_classes=1000, n_blocks=[3, 4, 23, 3]) 129 | model.eval() 130 | image = torch.randn(1, 3, 224, 224) 131 | 132 | print(model) 133 | print("input:", image.shape) 134 | print("output:", model(image).shape) 135 | -------------------------------------------------------------------------------- /scripts/my_optim.py: -------------------------------------------------------------------------------- 1 | import torch.optim as optim 2 | from torch.optim.lr_scheduler import LambdaLR 3 | import numpy as np 4 | 5 | #def get_finetune_optimizer(args, model): 6 | # lr = args.lr 7 | # weight_list = [] 8 | # bias_list = [] 9 | # last_weight_list = [] 10 | # last_bias_list =[] 11 | # for name, value in model.named_parameters(): 12 | # # if 'features' in name: 13 | # # value.requires_grad = False 14 | # if 'cls' in name: 15 | # if 'weight' in name: 16 | # last_weight_list.append(value) 17 | # elif 'bias' in name: 18 | # last_bias_list.append(value) 19 | # else: 20 | # if 'weight' in name: 21 | # weight_list.append(value) 22 | # elif 'bias' in name: 23 | # bias_list.append(value) 24 | # 25 | # opt = optim.SGD([{'params': weight_list, 'lr':lr}, 26 | # {'params':bias_list, 'lr':lr*2}, 27 | # {'params':last_weight_list, 'lr':lr*10}, 28 | # {'params': last_bias_list, 'lr':lr*20}], momentum=0.9, weight_decay=args.weight_decay, nesterov=True) 29 | # 30 | # return opt 31 | 32 | class PolyOptimizer(optim.SGD): 33 | 34 | def __init__(self, params, lr, weight_decay, max_step, momentum=0.9): 35 | super().__init__(params, lr, weight_decay) 36 | self.param_groups = params 37 | self.global_step = 0 38 | self.max_step = max_step 39 | self.momentum = momentum 40 | 41 | self.__initial_lr = [group['lr'] for group in self.param_groups] 42 | 43 | 44 | def step(self, closure=None): 45 | 46 | if self.global_step < self.max_step: 47 | lr_mult = (1 - self.global_step / self.max_step) ** self.momentum 48 | 49 | for i in range(len(self.param_groups)): 50 | self.param_groups[i]['lr'] = self.__initial_lr[i] * lr_mult 51 | super().step(closure) 52 | 53 | self.global_step += 1 54 | 55 | 56 | def lr_poly(base_lr, iter,max_iter,power=0.9): 57 | return base_lr*((1-float(iter)/max_iter)**(power)) 58 | 59 | def reduce_lr_poly(args, optimizer, global_iter, max_iter): 60 | base_lr = args.lr 61 | for g in optimizer.param_groups: 62 | g['lr'] = lr_poly(base_lr=base_lr, iter=global_iter, max_iter=max_iter, power=0.9) 63 | 64 | def get_optimizer(args, model): 65 | lr = args.lr 66 | # opt = optim.SGD(params=model.parameters(), lr=lr, momentum=0.9, weight_decay=0.0001) 67 | opt = optim.SGD(params=[para for name, para in model.named_parameters() if 'features' not in name], lr=lr, momentum=0.9, weight_decay=0.0005) 68 | # lambda1 = lambda epoch: 0.1 if epoch in [85, 125, 165] else 1.0 69 | # scheduler = LambdaLR(opt, lr_lambda=lambda1) 70 | 71 | return opt 72 | 73 | def get_adam(args, model): 74 | lr = args.lr 75 | opt = optim.Adam(params=model.parameters(), lr =lr, weight_decay=0.0005) 76 | # opt = optim.Adam(params=model.parameters(), lr =lr) 77 | 78 | return opt 79 | 80 | def reduce_lr(args, optimizer, epoch, factor=0.1): 81 | # if 'coco' in args.dataset: 82 | # change_points = [1,2,3,4,5] 83 | # elif 'imagenet' in args.dataset: 84 | # change_points = [1,2,3,4,5,6,7,8,9,10,11,12] 85 | # else: 86 | # change_points = None 87 | 88 | values = args.decay_points.strip().split(',') 89 | try: 90 | change_points = map(lambda x: int(x.strip()), values) 91 | except ValueError: 92 | change_points = None 93 | 94 | if change_points is not None and epoch in change_points: 95 | for g in optimizer.param_groups: 96 | g['lr'] = g['lr']*factor 97 | print(epoch, g['lr']) 98 | return True 99 | 100 | def adjust_lr(args, optimizer, epoch): 101 | if 'cifar' in args.dataset: 102 | change_points = [80, 120, 160] 103 | elif 'indoor' in args.dataset: 104 | change_points = [60, 80, 100] 105 | elif 'dog' in args.dataset: 106 | change_points = [60, 80, 100] 107 | elif 'voc' in args.dataset: 108 | change_points = [30, 40] 109 | else: 110 | change_points = None 111 | # else: 112 | 113 | # if epoch in change_points: 114 | # lr = args.lr * 0.1**(change_points.index(epoch)+1) 115 | # else: 116 | # lr = args.lr 117 | 118 | if change_points is not None: 119 | change_points = np.array(change_points) 120 | pos = np.sum(epoch > change_points) 121 | lr = args.lr * (0.1**pos) 122 | else: 123 | lr = args.lr 124 | 125 | for param_group in optimizer.param_groups: 126 | param_group['lr'] = lr 127 | -------------------------------------------------------------------------------- /utils/pyutils.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import time 4 | import sys 5 | 6 | class Logger(object): 7 | def __init__(self, outfile): 8 | self.terminal = sys.stdout 9 | self.log = open(outfile, "w") 10 | sys.stdout = self 11 | 12 | def write(self, message): 13 | self.terminal.write(message) 14 | self.log.write(message) 15 | 16 | def flush(self): 17 | self.terminal.flush() 18 | 19 | 20 | class AverageMeter: 21 | def __init__(self, *keys): 22 | self.__data = dict() 23 | for k in keys: 24 | self.__data[k] = [0.0, 0] 25 | 26 | def add(self, dict): 27 | for k, v in dict.items(): 28 | self.__data[k][0] += v 29 | self.__data[k][1] += 1 30 | 31 | def get(self, *keys): 32 | if len(keys) == 1: 33 | return self.__data[keys[0]][0] / self.__data[keys[0]][1] 34 | else: 35 | v_list = [self.__data[k][0] / self.__data[k][1] for k in keys] 36 | return tuple(v_list) 37 | 38 | def pop(self, key=None): 39 | if key is None: 40 | for k in self.__data.keys(): 41 | self.__data[k] = [0.0, 0] 42 | else: 43 | v = self.get(key) 44 | self.__data[key] = [0.0, 0] 45 | return v 46 | 47 | 48 | class Timer: 49 | def __init__(self, starting_msg = None): 50 | self.start = time.time() 51 | self.stage_start = self.start 52 | 53 | if starting_msg is not None: 54 | print(starting_msg, time.ctime(time.time())) 55 | 56 | 57 | def update_progress(self, progress): 58 | self.elapsed = time.time() - self.start 59 | self.est_total = self.elapsed / progress 60 | self.est_remaining = self.est_total - self.elapsed 61 | self.est_finish = int(self.start + self.est_total) 62 | 63 | 64 | def str_est_finish(self): 65 | return str(time.ctime(self.est_finish)) 66 | 67 | def get_stage_elapsed(self): 68 | return time.time() - self.stage_start 69 | 70 | def reset_stage(self): 71 | self.stage_start = time.time() 72 | 73 | 74 | from multiprocessing.pool import ThreadPool 75 | 76 | class BatchThreader: 77 | 78 | def __init__(self, func, args_list, batch_size, prefetch_size=4, processes=12): 79 | self.batch_size = batch_size 80 | self.prefetch_size = prefetch_size 81 | 82 | self.pool = ThreadPool(processes=processes) 83 | self.async_result = [] 84 | 85 | self.func = func 86 | self.left_args_list = args_list 87 | self.n_tasks = len(args_list) 88 | 89 | # initial work 90 | self.__start_works(self.__get_n_pending_works()) 91 | 92 | 93 | def __start_works(self, times): 94 | for _ in range(times): 95 | args = self.left_args_list.pop(0) 96 | self.async_result.append( 97 | self.pool.apply_async(self.func, args)) 98 | 99 | 100 | def __get_n_pending_works(self): 101 | return min((self.prefetch_size + 1) * self.batch_size - len(self.async_result) 102 | , len(self.left_args_list)) 103 | 104 | 105 | 106 | def pop_results(self): 107 | 108 | n_inwork = len(self.async_result) 109 | 110 | n_fetch = min(n_inwork, self.batch_size) 111 | rtn = [self.async_result.pop(0).get() 112 | for _ in range(n_fetch)] 113 | 114 | to_fill = self.__get_n_pending_works() 115 | if to_fill == 0: 116 | self.pool.close() 117 | else: 118 | self.__start_works(to_fill) 119 | 120 | return rtn 121 | 122 | 123 | 124 | 125 | def get_indices_of_pairs(radius, size): 126 | 127 | search_dist = [] 128 | 129 | for x in range(1, radius): 130 | search_dist.append((0, x)) 131 | 132 | for y in range(1, radius): 133 | for x in range(-radius + 1, radius): 134 | if x * x + y * y < radius * radius: 135 | search_dist.append((y, x)) 136 | 137 | radius_floor = radius - 1 138 | 139 | full_indices = np.reshape(np.arange(0, size[0]*size[1], dtype=np.int64), 140 | (size[0], size[1])) 141 | 142 | cropped_height = size[0] - radius_floor 143 | cropped_width = size[1] - 2 * radius_floor 144 | 145 | indices_from = np.reshape(full_indices[:-radius_floor, radius_floor:-radius_floor], 146 | [-1]) 147 | 148 | indices_to_list = [] 149 | 150 | for dy, dx in search_dist: 151 | indices_to = full_indices[dy:dy + cropped_height, 152 | radius_floor + dx:radius_floor + dx + cropped_width] 153 | indices_to = np.reshape(indices_to, [-1]) 154 | 155 | indices_to_list.append(indices_to) 156 | 157 | concat_indices_to = np.concatenate(indices_to_list, axis=0) 158 | 159 | return indices_from, concat_indices_to 160 | 161 | -------------------------------------------------------------------------------- /deeplab-pytorch/libs/datasets/voc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: https://kazuto1011.github.io 6 | # Date: 08 February 2019 7 | 8 | from __future__ import absolute_import, print_function 9 | 10 | import os.path as osp 11 | 12 | import cv2 13 | import numpy as np 14 | import torch 15 | from PIL import Image 16 | from torch.utils import data 17 | 18 | from .base import _BaseDataset 19 | 20 | 21 | class VOC(_BaseDataset): 22 | """ 23 | PASCAL VOC Segmentation dataset 24 | """ 25 | 26 | def __init__(self, year=2012, **kwargs): 27 | self.year = year 28 | super(VOC, self).__init__(**kwargs) 29 | 30 | def _set_files(self): 31 | self.root = osp.join(self.root, "VOC{}".format(self.year)) 32 | self.image_dir = osp.join(self.root, "JPEGImages") 33 | self.label_dir = osp.join(self.root, "SegmentationClass") 34 | 35 | if self.split in ["train", "trainval", "val", "test"]: 36 | file_list = osp.join( 37 | self.root, "ImageSets/Segmentation", self.split + ".txt" 38 | ) 39 | file_list = tuple(open(file_list, "r")) 40 | file_list = [id_.rstrip() for id_ in file_list] 41 | self.files = file_list 42 | else: 43 | raise ValueError("Invalid split name: {}".format(self.split)) 44 | 45 | def _load_data(self, index): 46 | # Set paths 47 | image_id = self.files[index] 48 | image_path = osp.join(self.image_dir, image_id + ".jpg") 49 | label_path = osp.join(self.label_dir, image_id + ".png") 50 | # Load an image 51 | image = cv2.imread(image_path, cv2.IMREAD_COLOR).astype(np.float32) 52 | label = np.asarray(Image.open(label_path), dtype=np.int32) 53 | return image_id, image, label 54 | 55 | 56 | class VOCAug(_BaseDataset): 57 | """ 58 | PASCAL VOC Segmentation dataset with extra annotations 59 | """ 60 | 61 | def __init__(self, year=2012, **kwargs): 62 | self.year = year 63 | super(VOCAug, self).__init__(**kwargs) 64 | 65 | def _set_files(self): 66 | #self.root = osp.join(self.root, "VOC{}".format(self.year)) 67 | 68 | if self.split in ["train", "train_aug", "val"]: 69 | file_list = osp.join( 70 | "./data/datasets/voc12/", self.split + ".txt" 71 | ) 72 | file_list = tuple(open(file_list, "r")) 73 | file_list = [id_.rstrip().split(" ") for id_ in file_list] 74 | self.files, self.labels = list(zip(*file_list)) 75 | else: 76 | raise ValueError("Invalid split name: {}".format(self.split)) 77 | 78 | def _load_data(self, index): 79 | # Set paths 80 | image_id = self.files[index].split("/")[-1].split(".")[0] 81 | image_path = osp.join(self.root, self.files[index][1:]) 82 | label_path = osp.join(self.root, self.labels[index][1:]) 83 | # Load an image 84 | image = cv2.imread(image_path, cv2.IMREAD_COLOR).astype(np.float32) 85 | label = np.asarray(Image.open(label_path), dtype=np.int32) 86 | return image_id, image, label 87 | 88 | 89 | if __name__ == "__main__": 90 | import matplotlib 91 | import matplotlib.pyplot as plt 92 | import matplotlib.cm as cm 93 | import torchvision 94 | import yaml 95 | from torchvision.utils import make_grid 96 | from tqdm import tqdm 97 | 98 | kwargs = {"nrow": 10, "padding": 50} 99 | batch_size = 100 100 | 101 | dataset = VOCAug( 102 | root="/media/kazuto1011/Extra/VOCdevkit", 103 | split="train_aug", 104 | ignore_label=255, 105 | mean_bgr=(104.008, 116.669, 122.675), 106 | year=2012, 107 | augment=True, 108 | base_size=None, 109 | crop_size=513, 110 | scales=(0.5, 0.75, 1.0, 1.25, 1.5), 111 | flip=True, 112 | ) 113 | print(dataset) 114 | 115 | loader = data.DataLoader(dataset, batch_size=batch_size) 116 | 117 | for i, (image_ids, images, labels) in tqdm( 118 | enumerate(loader), total=np.ceil(len(dataset) / batch_size), leave=False 119 | ): 120 | if i == 0: 121 | mean = torch.tensor((104.008, 116.669, 122.675))[None, :, None, None] 122 | images += mean.expand_as(images) 123 | image = make_grid(images, pad_value=-1, **kwargs).numpy() 124 | image = np.transpose(image, (1, 2, 0)) 125 | mask = np.zeros(image.shape[:2]) 126 | mask[(image != -1)[..., 0]] = 255 127 | image = np.dstack((image, mask)).astype(np.uint8) 128 | 129 | labels = labels[:, np.newaxis, ...] 130 | label = make_grid(labels, pad_value=255, **kwargs).numpy() 131 | label_ = np.transpose(label, (1, 2, 0))[..., 0].astype(np.float32) 132 | label = cm.jet_r(label_ / 21.0) * 255 133 | mask = np.zeros(label.shape[:2]) 134 | label[..., 3][(label_ == 255)] = 0 135 | label = label.astype(np.uint8) 136 | 137 | tiled_images = np.hstack((image, label)) 138 | # cv2.imwrite("./docs/datasets/voc12.png", tiled_images) 139 | plt.imshow(np.dstack((tiled_images[..., 2::-1], tiled_images[..., 3]))) 140 | plt.show() 141 | break 142 | -------------------------------------------------------------------------------- /utils/Restore.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | __all__ = ['restore'] 5 | 6 | def restore(args, model, optimizer, istrain=True, including_opt=False): 7 | if os.path.isfile(args.restore_from) and ('.pth' in args.restore_from): 8 | snapshot = args.restore_from 9 | else: 10 | restore_dir = args.snapshot_dir 11 | filelist = os.listdir(restore_dir) 12 | filelist = [x for x in filelist if os.path.isfile(os.path.join(restore_dir,x)) and x.endswith('.pth.tar')] 13 | if len(filelist) > 0: 14 | filelist.sort(key=lambda fn:os.path.getmtime(os.path.join(restore_dir, fn)), reverse=True) 15 | snapshot = os.path.join(restore_dir, filelist[0]) 16 | else: 17 | snapshot = '' 18 | 19 | if os.path.isfile(snapshot): 20 | print("=> loading checkpoint '{}'".format(snapshot)) 21 | checkpoint = torch.load(snapshot) 22 | _model_load(model, checkpoint) 23 | #try: 24 | # if istrain: 25 | # args.current_epoch = checkpoint['epoch'] + 1 26 | # args.global_counter = checkpoint['global_counter'] + 1 27 | # if including_opt: 28 | # optimizer.load_state_dict(checkpoint['optimizer']) 29 | # model.load_state_dict(checkpoint['state_dict']) 30 | # print("=> loaded checkpoint '{}' (epoch {})".format(snapshot, checkpoint['epoch'])) 31 | #except KeyError: 32 | # print("KeyError") 33 | # if args.arch=='vgg_v5_7' or args.arch=='vgg_v7' or args.arch=='vgg_v10': 34 | # _model_load_v6(model, checkpoint) 35 | # # elif args.arch=='vgg_v2': 36 | # # _model_load_v2(model, checkpoint) 37 | # else: 38 | # _model_load(model, checkpoint) 39 | #except KeyError: 40 | # print("Loading pre-trained values failed.") 41 | # raise 42 | print("=> loaded checkpoint '{}'".format(snapshot)) 43 | else: 44 | print("=> no checkpoint found at '{}'".format(snapshot)) 45 | 46 | 47 | def _model_load(model, pretrained_dict): 48 | model_dict = model.state_dict() 49 | 50 | # model_dict_keys = [v.replace('module.', '') for v in model_dict.keys() if v.startswith('module.')] 51 | if list(model_dict.keys())[0].startswith('module.'): 52 | pretrained_dict = {'module.'+k: v for k, v in pretrained_dict.items()} 53 | 54 | # print pretrained_dict.keys() 55 | # print model.state_dict().keys() 56 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()} 57 | print("Weights cannot be loaded:") 58 | print([k for k in model_dict.keys() if k not in pretrained_dict.keys()]) 59 | 60 | model_dict.update(pretrained_dict) 61 | model.load_state_dict(model_dict) 62 | 63 | def _model_load_v6(model, pretrained_dict): 64 | model_dict = model.state_dict() 65 | 66 | # model_dict_keys = [v.replace('module.', '') for v in model_dict.keys() if v.startswith('module.')] 67 | if model_dict.keys()[0].startswith('module.'): 68 | pretrained_dict = {'module.'+k: v for k, v in pretrained_dict.items()} 69 | 70 | 71 | feature2_pred_w = {'module.fc5_seg.%d.weight'%(i):'module.features.%d.weight'%(i+24) for i in range(0,5,2)} 72 | feature2_pred_b = {'module.fc5_seg.%d.bias'%(i):'module.features.%d.bias'%(i+24) for i in range(0,5,2)} 73 | # feature_erase_pred_w = {'module.fc5_seg.%d.weight'%(i):'module.features.%d.weight'%(i+24) for i in range(0,5,2)} 74 | # feature_erase_pred_b = {'module.fc5_seg.%d.bias'%(i):'module.features.%d.bias'%(i+24) for i in range(0,5,2)} 75 | 76 | common_pred = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()} 77 | print("Weights cannot be loaded:") 78 | print([k for k in model_dict.keys() if k not in common_pred.keys()+ feature2_pred_w.keys() + feature2_pred_b.keys()]) 79 | 80 | def update_coord_dict(d): 81 | for k in d.keys(): 82 | model_dict[k] = pretrained_dict[d[k]] 83 | 84 | update_coord_dict(feature2_pred_w) 85 | update_coord_dict(feature2_pred_b) 86 | # update_coord_dict(feature_erase_pred_w) 87 | # update_coord_dict(feature_erase_pred_b) 88 | 89 | 90 | model_dict.update(common_pred) 91 | model.load_state_dict(model_dict) 92 | 93 | def _model_load_v2(model, pretrained_dict): 94 | model_dict = model.state_dict() 95 | 96 | # model_dict_keys = [v.replace('module.', '') for v in model_dict.keys() if v.startswith('module.')] 97 | if model_dict.keys()[0].startswith('module.'): 98 | pretrained_dict = {'module.'+k: v for k, v in pretrained_dict.items()} 99 | 100 | 101 | fc5_cls_w = {'module.fc5_cls.%d.weight'%(i):'module.features.%d.weight'%(i+24) for i in range(0,5,2)} 102 | fc5_cls_b = {'module.fc5_cls.%d.bias'%(i):'module.features.%d.bias'%(i+24) for i in range(0,5,2)} 103 | fc5_seg_w = {'module.fc5_seg.%d.weight'%(i):'module.features.%d.weight'%(i+24) for i in range(0,5,2)} 104 | fc5_seg_b = {'module.fc5_seg.%d.bias'%(i):'module.features.%d.bias'%(i+24) for i in range(0,5,2)} 105 | 106 | common_pred = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()} 107 | print("Weights cannot be loaded:") 108 | print([k for k in model_dict.keys() if k not in common_pred.keys()+fc5_cls_w.keys()+ 109 | fc5_cls_b.keys() + fc5_seg_w.keys() + fc5_seg_b.keys()]) 110 | 111 | def update_coord_dict(d): 112 | for k in d.keys(): 113 | model_dict[k] = pretrained_dict[d[k]] 114 | 115 | update_coord_dict(fc5_cls_w) 116 | update_coord_dict(fc5_cls_b) 117 | update_coord_dict(fc5_seg_w) 118 | update_coord_dict(fc5_seg_b) 119 | 120 | 121 | model_dict.update(common_pred) 122 | model.load_state_dict(model_dict) 123 | -------------------------------------------------------------------------------- /deeplab-pytorch/data/datasets/cocostuff/cocostuff_hierarchy.yaml: -------------------------------------------------------------------------------- 1 | things: 2 | indoor-super-things: 3 | appliance-things: 4 | - microwave 5 | - oven 6 | - toaster 7 | - sink 8 | - refrigerator 9 | - blender 10 | electronic-things: 11 | - tv 12 | - laptop 13 | - mouse 14 | - remote 15 | - keyboard 16 | - cell phone 17 | food-things: 18 | - banana 19 | - apple 20 | - sandwich 21 | - orange 22 | - broccoli 23 | - carrot 24 | - hot dog 25 | - pizza 26 | - donut 27 | - cake 28 | furniture-things: 29 | - chair 30 | - couch 31 | - potted plant 32 | - bed 33 | - mirror 34 | - dining table 35 | - window 36 | - desk 37 | - toilet 38 | - door 39 | indoor-things: 40 | - book 41 | - clock 42 | - vase 43 | - scissors 44 | - teddy bear 45 | - hair drier 46 | - toothbrush 47 | - hair brush 48 | kitchen-things: 49 | - bottle 50 | - plate 51 | - wine glass 52 | - cup 53 | - fork 54 | - knife 55 | - spoon 56 | - bowl 57 | outdoor-super-things: 58 | accessory-things: 59 | - hat 60 | - backpack 61 | - umbrella 62 | - shoe 63 | - eye glasses 64 | - handbag 65 | - tie 66 | - suitcase 67 | animal-things: 68 | - bird 69 | - cat 70 | - dog 71 | - horse 72 | - sheep 73 | - cow 74 | - elephant 75 | - bear 76 | - zebra 77 | - giraffe 78 | outdoor-things: 79 | - traffic light 80 | - fire hydrant 81 | - street sign 82 | - stop sign 83 | - parking meter 84 | - bench 85 | person-things: 86 | - person 87 | sports-things: 88 | - frisbee 89 | - skis 90 | - snowboard 91 | - sports ball 92 | - kite 93 | - baseball bat 94 | - baseball glove 95 | - skateboard 96 | - surfboard 97 | - tennis racket 98 | vehicle-things: 99 | - bicycle 100 | - car 101 | - motorcycle 102 | - airplane 103 | - bus 104 | - train 105 | - truck 106 | - boat 107 | stuff: 108 | indoor-super-stuff: 109 | ceiling-stuff: 110 | - ceiling-tile 111 | - ceiling-other 112 | floor-stuff: 113 | - carpet 114 | - floor-tile 115 | - floor-wood 116 | - floor-marble 117 | - floor-stone 118 | - floor-other 119 | food-stuff: 120 | - fruit 121 | - salad 122 | - vegetable 123 | - food-other 124 | furniture-stuff: 125 | - door-stuff 126 | - desk-stuff 127 | - table 128 | - shelf 129 | - cabinet 130 | - cupboard 131 | - mirror-stuff 132 | - counter 133 | - light 134 | - stairs 135 | - furniture-other 136 | rawmaterial-stuff: 137 | - cardboard 138 | - paper 139 | - plastic 140 | - metal 141 | textile-stuff: 142 | - rug 143 | - mat 144 | - towel 145 | - napkin 146 | - clothes 147 | - cloth 148 | - curtain 149 | - blanket 150 | - pillow 151 | - banner 152 | - textile-other 153 | wall-stuff: 154 | - wall-tile 155 | - wall-panel 156 | - wall-wood 157 | - wall-brick 158 | - wall-stone 159 | - wall-concrete 160 | - wall-other 161 | window-stuff: 162 | - window-blind 163 | - window-other 164 | outdoor-super-stuff: 165 | building-stuff: 166 | - house 167 | - skyscraper 168 | - bridge 169 | - tent 170 | - roof 171 | - building-other 172 | ground-stuff: 173 | - sand 174 | - snow 175 | - dirt 176 | - mud 177 | - gravel 178 | - road 179 | - pavement 180 | - railroad 181 | - platform 182 | - playingfield 183 | - ground-other 184 | plant-stuff: 185 | - grass 186 | - tree 187 | - bush 188 | - leaves 189 | - flower 190 | - branch 191 | - moss 192 | - straw 193 | - plant-other 194 | sky-stuff: 195 | - clouds 196 | - sky-other 197 | solid-stuff: 198 | - wood 199 | - rock 200 | - stone 201 | - mountain 202 | - hill 203 | - solid-other 204 | structural-stuff: 205 | - fence 206 | - cage 207 | - net 208 | - railing 209 | - structural-other 210 | water-stuff: 211 | - fog 212 | - river 213 | - sea 214 | - waterdrops 215 | - water-other -------------------------------------------------------------------------------- /deeplab-pytorch/libs/datasets/cocostuff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: http://kazuto1011.github.io 6 | # Created: 2017-10-30 7 | 8 | from __future__ import absolute_import, print_function 9 | 10 | import os.path as osp 11 | from glob import glob 12 | 13 | import cv2 14 | import numpy as np 15 | import scipy.io as sio 16 | import torch 17 | from PIL import Image 18 | from torch.utils import data 19 | 20 | from .base import _BaseDataset 21 | 22 | 23 | class CocoStuff10k(_BaseDataset): 24 | """COCO-Stuff 10k dataset""" 25 | 26 | def __init__(self, warp_image=True, **kwargs): 27 | self.warp_image = warp_image 28 | super(CocoStuff10k, self).__init__(**kwargs) 29 | 30 | def _set_files(self): 31 | # Create data list via {train, test, all}.txt 32 | if self.split in ["train", "test", "all"]: 33 | file_list = osp.join(self.root, "imageLists", self.split + ".txt") 34 | file_list = tuple(open(file_list, "r")) 35 | file_list = [id_.rstrip() for id_ in file_list] 36 | self.files = file_list 37 | else: 38 | raise ValueError("Invalid split name: {}".format(self.split)) 39 | 40 | def _load_data(self, index): 41 | # Set paths 42 | image_id = self.files[index] 43 | image_path = osp.join(self.root, "images", image_id + ".jpg") 44 | label_path = osp.join(self.root, "annotations", image_id + ".mat") 45 | # Load an image and label 46 | image = cv2.imread(image_path, cv2.IMREAD_COLOR).astype(np.float32) 47 | label = sio.loadmat(label_path)["S"] 48 | label -= 1 # unlabeled (0 -> -1) 49 | label[label == -1] = 255 50 | # Warping: this is just for reproducing the official scores on GitHub 51 | if self.warp_image: 52 | image = cv2.resize(image, (513, 513), interpolation=cv2.INTER_LINEAR) 53 | label = Image.fromarray(label).resize((513, 513), resample=Image.NEAREST) 54 | label = np.asarray(label) 55 | return image_id, image, label 56 | 57 | 58 | class CocoStuff164k(_BaseDataset): 59 | """COCO-Stuff 164k dataset""" 60 | 61 | def __init__(self, **kwargs): 62 | super(CocoStuff164k, self).__init__(**kwargs) 63 | 64 | def _set_files(self): 65 | # Create data list by parsing the "images" folder 66 | if self.split in ["train2017", "val2017"]: 67 | file_list = sorted(glob(osp.join(self.root, "images", self.split, "*.jpg"))) 68 | assert len(file_list) > 0, "{} has no image".format( 69 | osp.join(self.root, "images", self.split) 70 | ) 71 | file_list = [f.split("/")[-1].replace(".jpg", "") for f in file_list] 72 | self.files = file_list 73 | else: 74 | raise ValueError("Invalid split name: {}".format(self.split)) 75 | 76 | def _load_data(self, index): 77 | # Set paths 78 | image_id = self.files[index] 79 | image_path = osp.join(self.root, "images", self.split, image_id + ".jpg") 80 | label_path = osp.join(self.root, "annotations", self.split, image_id + ".png") 81 | # Load an image and label 82 | image = cv2.imread(image_path, cv2.IMREAD_COLOR).astype(np.float32) 83 | label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE) 84 | return image_id, image, label 85 | 86 | 87 | def get_parent_class(value, dictionary): 88 | # Get parent class with COCO-Stuff hierarchy 89 | for k, v in dictionary.items(): 90 | if isinstance(v, list): 91 | if value in v: 92 | yield k 93 | elif isinstance(v, dict): 94 | if value in list(v.keys()): 95 | yield k 96 | else: 97 | for res in get_parent_class(value, v): 98 | yield res 99 | 100 | 101 | if __name__ == "__main__": 102 | import matplotlib 103 | import matplotlib.pyplot as plt 104 | import matplotlib.cm as cm 105 | import torchvision 106 | import yaml 107 | from torchvision.utils import make_grid 108 | from tqdm import tqdm 109 | 110 | kwargs = {"nrow": 10, "padding": 50} 111 | batch_size = 100 112 | 113 | dataset = CocoStuff164k( 114 | root="/media/kazuto1011/Extra/cocostuff/cocostuff-164k", 115 | split="train2017", 116 | ignore_label=255, 117 | mean_bgr=(104.008, 116.669, 122.675), 118 | augment=True, 119 | crop_size=321, 120 | scales=(0.5, 0.75, 1.0, 1.25, 1.5), 121 | flip=True, 122 | ) 123 | print(dataset) 124 | 125 | loader = data.DataLoader(dataset, batch_size=batch_size) 126 | 127 | for i, (image_ids, images, labels) in tqdm( 128 | enumerate(loader), total=np.ceil(len(dataset) / batch_size), leave=False 129 | ): 130 | if i == 0: 131 | mean = torch.tensor((104.008, 116.669, 122.675))[None, :, None, None] 132 | images += mean.expand_as(images) 133 | image = make_grid(images, pad_value=-1, **kwargs).numpy() 134 | image = np.transpose(image, (1, 2, 0)) 135 | mask = np.zeros(image.shape[:2]) 136 | mask[(image != -1)[..., 0]] = 255 137 | image = np.dstack((image, mask)).astype(np.uint8) 138 | 139 | labels = labels[:, np.newaxis, ...] 140 | label = make_grid(labels, pad_value=255, **kwargs).numpy() 141 | label_ = np.transpose(label, (1, 2, 0))[..., 0].astype(np.float32) 142 | label = cm.jet_r(label_ / 182.0) * 255 143 | mask = np.zeros(label.shape[:2]) 144 | label[..., 3][(label_ == 255)] = 0 145 | label = label.astype(np.uint8) 146 | 147 | tiled_images = np.hstack((image, label)) 148 | # cv2.imwrite("./docs/datasets/cocostuff.png", tiled_images) 149 | plt.imshow(np.dstack((tiled_images[..., 2::-1], tiled_images[..., 3]))) 150 | plt.show() 151 | break 152 | 153 | class_hierarchy = "./data/datasets/cocostuff/cocostuff_hierarchy.yaml" 154 | data = yaml.load(open(class_hierarchy)) 155 | key = "person" 156 | 157 | for _ in range(3): 158 | key = get_parent_class(key, data) 159 | key = list(key)[0] 160 | print(key) 161 | -------------------------------------------------------------------------------- /utils/torchutils.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from torch.utils.data import Dataset 4 | from PIL import Image 5 | import os.path 6 | import random 7 | import numpy as np 8 | from tool import imutils 9 | 10 | class PolyOptimizer(torch.optim.SGD): 11 | 12 | def __init__(self, params, lr, weight_decay, max_step, momentum=0.9): 13 | super().__init__(params, lr, weight_decay) 14 | 15 | self.global_step = 0 16 | self.max_step = max_step 17 | self.momentum = momentum 18 | 19 | self.__initial_lr = [group['lr'] for group in self.param_groups] 20 | 21 | 22 | def step(self, closure=None): 23 | 24 | if self.global_step < self.max_step: 25 | lr_mult = (1 - self.global_step / self.max_step) ** self.momentum 26 | 27 | for i in range(len(self.param_groups)): 28 | self.param_groups[i]['lr'] = self.__initial_lr[i] * lr_mult 29 | super().step(closure) 30 | 31 | self.global_step += 1 32 | 33 | 34 | class BatchNorm2dFixed(torch.nn.Module): 35 | 36 | def __init__(self, num_features, eps=1e-5): 37 | super(BatchNorm2dFixed, self).__init__() 38 | self.num_features = num_features 39 | self.eps = eps 40 | self.weight = torch.nn.Parameter(torch.Tensor(num_features)) 41 | self.bias = torch.nn.Parameter(torch.Tensor(num_features)) 42 | self.register_buffer('running_mean', torch.zeros(num_features)) 43 | self.register_buffer('running_var', torch.ones(num_features)) 44 | 45 | 46 | def forward(self, input): 47 | 48 | return F.batch_norm( 49 | input, self.running_mean, self.running_var, self.weight, self.bias, 50 | False, eps=self.eps) 51 | 52 | def __call__(self, x): 53 | return self.forward(x) 54 | 55 | 56 | class SegmentationDataset(Dataset): 57 | def __init__(self, img_name_list_path, img_dir, label_dir, rescale=None, flip=False, cropsize=None, 58 | img_transform=None, mask_transform=None): 59 | self.img_name_list_path = img_name_list_path 60 | self.img_dir = img_dir 61 | self.label_dir = label_dir 62 | 63 | self.img_transform = img_transform 64 | self.mask_transform = mask_transform 65 | 66 | self.img_name_list = open(self.img_name_list_path).read().splitlines() 67 | 68 | self.rescale = rescale 69 | self.flip = flip 70 | self.cropsize = cropsize 71 | 72 | def __len__(self): 73 | return len(self.img_name_list) 74 | 75 | def __getitem__(self, idx): 76 | 77 | name = self.img_name_list[idx] 78 | 79 | img = Image.open(os.path.join(self.img_dir, name + '.jpg')).convert("RGB") 80 | mask = Image.open(os.path.join(self.label_dir, name + '.png')) 81 | 82 | if self.rescale is not None: 83 | s = self.rescale[0] + random.random() * (self.rescale[1] - self.rescale[0]) 84 | adj_size = (round(img.size[0]*s/8)*8, round(img.size[1]*s/8)*8) 85 | img = img.resize(adj_size, resample=Image.CUBIC) 86 | mask = img.resize(adj_size, resample=Image.NEAREST) 87 | 88 | if self.img_transform is not None: 89 | img = self.img_transform(img) 90 | if self.mask_transform is not None: 91 | mask = self.mask_transform(mask) 92 | 93 | if self.cropsize is not None: 94 | img, mask = imutils.random_crop([img, mask], self.cropsize, (0, 255)) 95 | 96 | mask = imutils.RescaleNearest(0.125)(mask) 97 | 98 | if self.flip is True and bool(random.getrandbits(1)): 99 | img = np.flip(img, 1).copy() 100 | mask = np.flip(mask, 1).copy() 101 | 102 | img = np.transpose(img, (2, 0, 1)) 103 | 104 | return name, img, mask 105 | 106 | 107 | class ExtractAffinityLabelInRadius(): 108 | 109 | def __init__(self, cropsize, radius=5): 110 | self.radius = radius 111 | 112 | self.search_dist = [] 113 | 114 | for x in range(1, radius): 115 | self.search_dist.append((0, x)) 116 | 117 | for y in range(1, radius): 118 | for x in range(-radius+1, radius): 119 | if x*x + y*y < radius*radius: 120 | self.search_dist.append((y, x)) 121 | 122 | self.radius_floor = radius-1 123 | 124 | self.crop_height = cropsize - self.radius_floor 125 | self.crop_width = cropsize - 2 * self.radius_floor 126 | return 127 | 128 | def __call__(self, label): 129 | 130 | labels_from = label[:-self.radius_floor, self.radius_floor:-self.radius_floor] 131 | labels_from = np.reshape(labels_from, [-1]) 132 | 133 | labels_to_list = [] 134 | valid_pair_list = [] 135 | 136 | for dy, dx in self.search_dist: 137 | labels_to = label[dy:dy+self.crop_height, self.radius_floor+dx:self.radius_floor+dx+self.crop_width] 138 | labels_to = np.reshape(labels_to, [-1]) 139 | 140 | valid_pair = np.logical_and(np.less(labels_to, 255), np.less(labels_from, 255)) 141 | 142 | labels_to_list.append(labels_to) 143 | valid_pair_list.append(valid_pair) 144 | 145 | bc_labels_from = np.expand_dims(labels_from, 0) 146 | concat_labels_to = np.stack(labels_to_list) 147 | concat_valid_pair = np.stack(valid_pair_list) 148 | 149 | pos_affinity_label = np.equal(bc_labels_from, concat_labels_to) 150 | 151 | bg_pos_affinity_label = np.logical_and(pos_affinity_label, np.equal(bc_labels_from, 0)).astype(np.float32) 152 | 153 | fg_pos_affinity_label = np.logical_and(np.logical_and(pos_affinity_label, np.not_equal(bc_labels_from, 0)), concat_valid_pair).astype(np.float32) 154 | 155 | neg_affinity_label = np.logical_and(np.logical_not(pos_affinity_label), concat_valid_pair).astype(np.float32) 156 | 157 | return bg_pos_affinity_label, fg_pos_affinity_label, neg_affinity_label 158 | 159 | class AffinityFromMaskDataset(SegmentationDataset): 160 | def __init__(self, img_name_list_path, img_dir, label_dir, rescale=None, flip=False, cropsize=None, 161 | img_transform=None, mask_transform=None, radius=5): 162 | super().__init__(img_name_list_path, img_dir, label_dir, rescale, flip, cropsize, img_transform, mask_transform) 163 | 164 | self.radius = radius 165 | 166 | self.extract_aff_lab_func = ExtractAffinityLabelInRadius(cropsize=cropsize//8, radius=radius) 167 | 168 | def __getitem__(self, idx): 169 | name, img, mask = super().__getitem__(idx) 170 | 171 | aff_label = self.extract_aff_lab_func(mask) 172 | 173 | return name, img, aff_label 174 | -------------------------------------------------------------------------------- /scripts/train_iam.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | sys.path.append(os.getcwd()) 4 | 5 | import torch 6 | import argparse 7 | import time 8 | import shutil 9 | import json 10 | import my_optim 11 | import torch.optim as optim 12 | from models import vgg1 13 | import torch.nn as nn 14 | from torchvision import transforms 15 | from torch.utils.data import DataLoader 16 | import torch.nn.functional as F 17 | from torch.autograd import Variable 18 | from utils import AverageMeter 19 | from utils.LoadData import train_data_loader_iam 20 | from tqdm import trange, tqdm 21 | 22 | 23 | def get_arguments(): 24 | parser = argparse.ArgumentParser(description='The Pytorch code of OAA') 25 | parser.add_argument("--img_dir", type=str, default='', help='Directory of training images') 26 | parser.add_argument("--train_list", type=str, default='None') 27 | parser.add_argument("--test_list", type=str, default='None') 28 | parser.add_argument("--batch_size", type=int, default=20) 29 | parser.add_argument("--iter_size", type=int, default=5) 30 | parser.add_argument("--input_size", type=int, default=256) 31 | parser.add_argument("--crop_size", type=int, default=224) 32 | parser.add_argument("--dataset", type=str, default='imagenet') 33 | parser.add_argument("--num_classes", type=int, default=20) 34 | parser.add_argument("--lr", type=float, default=0.001) 35 | parser.add_argument("--weight_decay", type=float, default=0.0005) 36 | parser.add_argument("--decay_points", type=str, default='61') 37 | parser.add_argument("--epoch", type=int, default=15) 38 | parser.add_argument("--num_workers", type=int, default=20) 39 | parser.add_argument("--disp_interval", type=int, default=100) 40 | parser.add_argument("--snapshot_dir", type=str, default='') 41 | parser.add_argument("--resume", type=str, default='False') 42 | parser.add_argument("--global_counter", type=int, default=0) 43 | parser.add_argument("--current_epoch", type=int, default=0) 44 | parser.add_argument("--att_dir", type=str, default='./runs/exp8/') 45 | 46 | return parser.parse_args() 47 | 48 | class ExLoss(nn.Module): 49 | def __init__(self): 50 | super(ExLoss, self).__init__() 51 | 52 | def forward(self, input, target): 53 | assert(input.size() == target.size()) 54 | pos = torch.gt(target, 0.001) 55 | neg = torch.le(target, 0.001) 56 | pos_loss = -target[pos] * torch.log(torch.sigmoid(input[pos])) 57 | neg_loss = -torch.log(1 - torch.sigmoid(input[neg]) + 1e-8) 58 | 59 | loss = 0.0 60 | num_pos = torch.sum(pos) 61 | num_neg = torch.sum(neg) 62 | # print(num_pos, num_neg) 63 | if num_pos > 0: 64 | loss += 1.0 / num_pos.float() * torch.sum(pos_loss) 65 | if num_neg > 0: 66 | loss += 1.0 / num_neg.float() * torch.sum(neg_loss) 67 | 68 | return loss 69 | 70 | 71 | def save_checkpoint(args, state, is_best, filename='checkpoint.pth.tar'): 72 | savepath = os.path.join(args.snapshot_dir, filename) 73 | torch.save(state, savepath) 74 | if is_best: 75 | shutil.copyfile(savepath, os.path.join(args.snapshot_dir, 'model_best.pth.tar')) 76 | 77 | def get_model(args): 78 | model = vgg1.vgg16(pretrained=True, num_classes=args.num_classes) 79 | model = torch.nn.DataParallel(model).cuda() 80 | param_groups = model.module.get_parameter_groups() 81 | optimizer = optim.SGD([ 82 | {'params': param_groups[0], 'lr': args.lr}, 83 | {'params': param_groups[1], 'lr': 2*args.lr}, 84 | {'params': param_groups[2], 'lr': 10*args.lr}, 85 | {'params': param_groups[3], 'lr': 20*args.lr}], momentum=0.9, weight_decay=args.weight_decay, nesterov=True) 86 | criterion = ExLoss() 87 | return model, optimizer, criterion 88 | 89 | def train(args): 90 | batch_time = AverageMeter() 91 | losses = AverageMeter() 92 | 93 | total_epoch = args.epoch 94 | global_counter = args.global_counter 95 | current_epoch = args.current_epoch 96 | 97 | train_loader = train_data_loader_iam(args) 98 | max_step = total_epoch*len(train_loader) 99 | args.max_step = max_step 100 | print('Max step:', max_step) 101 | 102 | model, optimizer, criterion = get_model(args) 103 | print(model) 104 | model.train() 105 | end = time.time() 106 | 107 | while current_epoch < total_epoch: 108 | model.train() 109 | losses.reset() 110 | batch_time.reset() 111 | res = my_optim.reduce_lr(args, optimizer, current_epoch) 112 | steps_per_epoch = len(train_loader) 113 | flag = 0 114 | 115 | for idx, dat in enumerate(train_loader): 116 | img, label = dat 117 | label = label.cuda(non_blocking=True) 118 | logits = model(img) 119 | 120 | if len(logits.shape) == 1: 121 | logits = logits.reshape(label.shape) 122 | loss_val = criterion(logits, label) 123 | loss_val.backward() 124 | 125 | flag += 1 126 | if flag == args.iter_size: 127 | optimizer.step() 128 | optimizer.zero_grad() 129 | flag = 0 130 | 131 | 132 | losses.update(loss_val.data.item(), img.size()[0]) 133 | batch_time.update(time.time() - end) 134 | end = time.time() 135 | 136 | global_counter += 1 137 | if global_counter % 1000 == 0: 138 | losses.reset() 139 | 140 | if global_counter % args.disp_interval == 0: 141 | print('Epoch: [{}][{}/{}]\t' 142 | 'LR: {:.5f}\t' 143 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( 144 | current_epoch, global_counter%len(train_loader), len(train_loader), 145 | optimizer.param_groups[0]['lr'], loss=losses)) 146 | 147 | if current_epoch == args.epoch-1: 148 | save_checkpoint(args, 149 | { 150 | 'epoch': current_epoch, 151 | 'global_counter': global_counter, 152 | 'state_dict':model.state_dict(), 153 | 'optimizer':optimizer.state_dict() 154 | }, is_best=False, 155 | filename='%s_epoch_%d.pth' %(args.dataset, current_epoch)) 156 | current_epoch += 1 157 | 158 | if __name__ == '__main__': 159 | args = get_arguments() 160 | print('Running parameters:\n', args) 161 | if not os.path.exists(args.snapshot_dir): 162 | os.makedirs(args.snapshot_dir) 163 | train(args) 164 | -------------------------------------------------------------------------------- /scripts/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | sys.path.append(os.getcwd()) 4 | 5 | import torch 6 | import argparse 7 | import time 8 | import shutil 9 | import json 10 | import my_optim 11 | import torch.optim as optim 12 | from models import vgg 13 | import torch.nn as nn 14 | from torchvision import transforms 15 | from torch.utils.data import DataLoader 16 | import torch.nn.functional as F 17 | from torch.autograd import Variable 18 | from utils import AverageMeter 19 | from utils.LoadData import train_data_loader 20 | from tqdm import trange, tqdm 21 | 22 | 23 | def get_arguments(): 24 | parser = argparse.ArgumentParser(description='The Pytorch code of OAA') 25 | parser.add_argument("--img_dir", type=str, default='', help='Directory of training images') 26 | parser.add_argument("--train_list", type=str, default='None') 27 | parser.add_argument("--test_list", type=str, default='None') 28 | parser.add_argument("--batch_size", type=int, default=1) 29 | parser.add_argument("--iter_size", type=int, default=5) 30 | parser.add_argument("--input_size", type=int, default=256) 31 | parser.add_argument("--crop_size", type=int, default=224) 32 | parser.add_argument("--dataset", type=str, default='imagenet') 33 | parser.add_argument("--num_classes", type=int, default=20) 34 | parser.add_argument("--lr", type=float, default=0.001) 35 | parser.add_argument("--weight_decay", type=float, default=0.0005) 36 | parser.add_argument("--decay_points", type=str, default='61') 37 | parser.add_argument("--epoch", type=int, default=15) 38 | parser.add_argument("--num_workers", type=int, default=20) 39 | parser.add_argument("--disp_interval", type=int, default=100) 40 | parser.add_argument("--snapshot_dir", type=str, default='') 41 | parser.add_argument("--resume", type=str, default='False') 42 | parser.add_argument("--global_counter", type=int, default=0) 43 | parser.add_argument("--current_epoch", type=int, default=0) 44 | parser.add_argument("--att_dir", type=str, default='./runs/exp1/att/') 45 | parser.add_argument("--accu_dir", type=str, default='./runs/exp1/accu_att/') 46 | parser.add_argument('--drop_layer', action='store_true') 47 | parser.add_argument('--drop_rate', type=float, default=0.5) 48 | parser.add_argument('--drop_threshold', type=float, default=0.6) 49 | 50 | return parser.parse_args() 51 | 52 | def save_checkpoint(args, state, is_best, filename='checkpoint.pth.tar'): 53 | savepath = os.path.join(args.snapshot_dir, filename) 54 | torch.save(state, savepath) 55 | if is_best: 56 | shutil.copyfile(savepath, os.path.join(args.snapshot_dir, 'model_best.pth.tar')) 57 | 58 | def get_model(args): 59 | model = vgg.vgg16(pretrained=True, num_classes=args.num_classes, att_dir=args.att_dir, accu_dir=args.accu_dir, 60 | training_epoch=args.epoch, drop_layer=args.drop_layer, drop_rate=args.drop_rate, drop_threshold=args.drop_threshold) 61 | model = torch.nn.DataParallel(model).cuda() 62 | param_groups = model.module.get_parameter_groups() 63 | optimizer = optim.SGD([ 64 | {'params': param_groups[0], 'lr': args.lr}, 65 | {'params': param_groups[1], 'lr': 2*args.lr}, 66 | {'params': param_groups[2], 'lr': 10*args.lr}, 67 | {'params': param_groups[3], 'lr': 20*args.lr}], momentum=0.9, weight_decay=args.weight_decay, nesterov=True) 68 | 69 | return model, optimizer 70 | 71 | 72 | def validate(model, val_loader): 73 | 74 | print('\nvalidating ... ', flush=True, end='') 75 | val_loss = AverageMeter() 76 | model.eval() 77 | 78 | with torch.no_grad(): 79 | for idx, dat in tqdm(enumerate(val_loader)): 80 | img_name, img, label = dat 81 | label = label.cuda(non_blocking=True) 82 | logits = model(img) 83 | if len(logits.shape) == 1: 84 | logits = logits.reshape(label.shape) 85 | loss_val = F.multilabel_soft_margin_loss(logits, label) 86 | val_loss.update(loss_val.data.item(), img.size()[0]) 87 | 88 | print('validating loss:', val_loss.avg) 89 | 90 | def train(args): 91 | batch_time = AverageMeter() 92 | losses = AverageMeter() 93 | 94 | total_epoch = args.epoch 95 | global_counter = args.global_counter 96 | current_epoch = args.current_epoch 97 | 98 | train_loader, val_loader = train_data_loader(args) 99 | max_step = total_epoch*len(train_loader) 100 | args.max_step = max_step 101 | print('Max step:', max_step) 102 | 103 | model, optimizer = get_model(args) 104 | print(model) 105 | model.train() 106 | end = time.time() 107 | 108 | while current_epoch < total_epoch: 109 | model.train() 110 | losses.reset() 111 | batch_time.reset() 112 | res = my_optim.reduce_lr(args, optimizer, current_epoch) 113 | steps_per_epoch = len(train_loader) 114 | 115 | validate(model, val_loader) 116 | index = 0 117 | flag = 0 118 | for idx, dat in enumerate(train_loader): 119 | img_name, img, label = dat 120 | label = label.cuda(non_blocking=True) 121 | 122 | logits = model(img, current_epoch, label, index) 123 | index += args.batch_size 124 | 125 | if len(logits.shape) == 1: 126 | logits = logits.reshape(label.shape) 127 | loss_val = F.multilabel_soft_margin_loss(logits, label) / args.iter_size 128 | loss_val.backward() 129 | 130 | flag += 1 131 | if flag == args.iter_size: 132 | optimizer.step() 133 | optimizer.zero_grad() 134 | flag = 0 135 | 136 | losses.update(loss_val.data.item(), img.size()[0]) 137 | batch_time.update(time.time() - end) 138 | end = time.time() 139 | 140 | global_counter += 1 141 | if global_counter % 1000 == 0: 142 | losses.reset() 143 | 144 | if global_counter % args.disp_interval == 0: 145 | print('Epoch: [{}][{}/{}]\t' 146 | 'LR: {:.5f}\t' 147 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( 148 | current_epoch, global_counter%len(train_loader), len(train_loader), 149 | optimizer.param_groups[0]['lr'], loss=losses)) 150 | 151 | if current_epoch == args.epoch-1: 152 | save_checkpoint(args, 153 | { 154 | 'epoch': current_epoch, 155 | 'global_counter': global_counter, 156 | 'state_dict':model.state_dict(), 157 | 'optimizer':optimizer.state_dict() 158 | }, is_best=False, 159 | filename='%s_epoch_%d.pth' %(args.dataset, current_epoch)) 160 | current_epoch += 1 161 | 162 | if __name__ == '__main__': 163 | args = get_arguments() 164 | print('Running parameters:\n', args) 165 | if not os.path.exists(args.snapshot_dir): 166 | os.makedirs(args.snapshot_dir) 167 | train(args) 168 | -------------------------------------------------------------------------------- /models/vgg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.utils.model_zoo as model_zoo 4 | import torch.nn.functional as F 5 | import math 6 | import cv2 7 | import numpy as np 8 | import os 9 | import random 10 | 11 | model_urls = {'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth'} 12 | 13 | class VGG(nn.Module): 14 | 15 | def __init__(self, features, num_classes=1000, init_weights=True, att_dir='./runs/', accu_dir='./runs/', 16 | training_epoch=15, drop_layer=False, drop_rate=0.0, drop_threshold=0.0): 17 | super(VGG, self).__init__() 18 | self.features = features 19 | self.extra_convs = nn.Sequential( 20 | nn.Conv2d(512, 512, kernel_size=3, padding=1), 21 | nn.ReLU(True), 22 | nn.Conv2d(512, 512, kernel_size=3, padding=1), 23 | nn.ReLU(True), 24 | nn.Conv2d(512, 512, kernel_size=3, padding=1), 25 | nn.ReLU(True), 26 | nn.Conv2d(512,20,1) 27 | ) 28 | self._initialize_weights() 29 | self.training_epoch = training_epoch 30 | self.att_dir = att_dir 31 | self.accu_dir = accu_dir 32 | 33 | self.drop_layer = drop_layer 34 | self.drop_rate = drop_rate 35 | self.drop_threshold = drop_threshold 36 | 37 | if self.drop_layer and not os.path.exists(self.att_dir): 38 | os.makedirs(self.att_dir) 39 | if not os.path.exists(self.accu_dir): 40 | os.makedirs(self.accu_dir) 41 | 42 | 43 | def forward(self, x, epoch=1, label=None, index=None): 44 | h, w = x.shape[-2:] 45 | if self.drop_layer and label!=None: 46 | if random.uniform(0, 1) < self.drop_rate: 47 | ind = torch.nonzero(label) 48 | for i in range(ind.shape[0]): 49 | batch_index, la = ind[i] 50 | att_img_path = '{}/{}_{}.png'.format(self.att_dir, batch_index+index, la) 51 | if os.path.exists(att_img_path): 52 | att = cv2.resize(cv2.imread(att_img_path, 0), (w, h)) / 255.0 53 | x[:, :, att > self.drop_threshold] = 0.0 54 | 55 | x = self.features(x) 56 | x = self.extra_convs(x) 57 | 58 | self.map1 = x.clone().detach() 59 | x = F.avg_pool2d(x, kernel_size=(x.size(2), x.size(3)), padding=0) 60 | x = x.view(-1, 20) 61 | 62 | ### the online attention accumulation process 63 | pre_probs = x.clone().detach() 64 | probs = torch.sigmoid(pre_probs) # compute the prob 65 | pred_inds_sort = torch.argsort(-probs) 66 | 67 | if index != None and epoch > 0: 68 | atts = self.map1 69 | atts[atts < 0] = 0 70 | ind = torch.nonzero(label) 71 | num_labels = torch.sum(label, dim=1).long() 72 | 73 | for i in range(ind.shape[0]): 74 | batch_index, la = ind[i] 75 | pred_ind_select = pred_inds_sort[batch_index, :num_labels[batch_index]] 76 | 77 | accu_map_name = '{}/{}_{}.png'.format(self.accu_dir, batch_index+index, la) 78 | att_map_name = '{}/{}_{}.png'.format(self.att_dir, batch_index+index, la) 79 | att = atts[batch_index, la].cpu().data.numpy() 80 | att = att / (att.max() + 1e-8) * 255 81 | 82 | # if this is the last epoch and the image without any accumulation 83 | if epoch == self.training_epoch - 1 and not os.path.exists(accu_map_name): 84 | cv2.imwrite(accu_map_name, att) 85 | continue 86 | 87 | #naive filter out the low quality attention map with prob 88 | if la not in list(pred_ind_select): 89 | continue 90 | 91 | if not os.path.exists(accu_map_name): 92 | cv2.imwrite(accu_map_name, att) 93 | else: 94 | accu_att = cv2.imread(accu_map_name, 0) 95 | accu_att = np.maximum(accu_att, att) 96 | cv2.imwrite(accu_map_name, accu_att) 97 | 98 | # save current attention maps for oaa drop layer 99 | if self.drop_layer: 100 | cv2.imwrite(att_map_name, att) 101 | 102 | ############################################## 103 | 104 | return x 105 | 106 | def get_heatmaps(self): 107 | return self.map1 108 | 109 | def _initialize_weights(self): 110 | for m in self.modules(): 111 | if isinstance(m, nn.Conv2d): 112 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 113 | m.weight.data.normal_(0, 0.01) 114 | # m.weight.data.normal_(0, math.sqrt(2. / n)) 115 | if m.bias is not None: 116 | m.bias.data.zero_() 117 | elif isinstance(m, nn.BatchNorm2d): 118 | m.weight.data.fill_(1) 119 | m.bias.data.zero_() 120 | elif isinstance(m, nn.Linear): 121 | m.weight.data.normal_(0, 0.01) 122 | m.bias.data.zero_() 123 | 124 | def get_parameter_groups(self): 125 | groups = ([], [], [], []) 126 | 127 | for name, value in self.named_parameters(): 128 | 129 | if 'extra' in name: 130 | if 'weight' in name: 131 | groups[2].append(value) 132 | else: 133 | groups[3].append(value) 134 | else: 135 | if 'weight' in name: 136 | groups[0].append(value) 137 | else: 138 | groups[1].append(value) 139 | return groups 140 | 141 | 142 | def make_layers(cfg, batch_norm=False): 143 | layers = [] 144 | in_channels = 3 145 | for i, v in enumerate(cfg): 146 | if v == 'M': 147 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 148 | elif v == 'N': 149 | layers += [nn.MaxPool2d(kernel_size=3, stride=1, padding=1)] 150 | else: 151 | if i > 13: 152 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, dilation=2, padding=2) 153 | else: 154 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 155 | if batch_norm: 156 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 157 | else: 158 | layers += [conv2d, nn.ReLU(inplace=True)] 159 | in_channels = v 160 | return nn.Sequential(*layers) 161 | 162 | 163 | cfg = { 164 | 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 165 | 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 166 | 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 167 | 'D1': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'N', 512, 512, 512], 168 | 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 169 | } 170 | 171 | 172 | def vgg16(pretrained=False, **kwargs): 173 | model = VGG(make_layers(cfg['D1']), **kwargs) 174 | if pretrained: 175 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16']), strict=False) 176 | return model 177 | -------------------------------------------------------------------------------- /deeplab-pytorch/demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: https://kazuto1011.github.io 6 | # Date: 07 January 2019 7 | 8 | from __future__ import absolute_import, division, print_function 9 | 10 | import click 11 | import cv2 12 | import matplotlib 13 | import matplotlib.cm as cm 14 | import matplotlib.pyplot as plt 15 | import numpy as np 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | from omegaconf import OmegaConf 20 | 21 | from libs.models import * 22 | from libs.utils import DenseCRF 23 | 24 | 25 | def get_device(cuda): 26 | cuda = cuda and torch.cuda.is_available() 27 | device = torch.device("cuda" if cuda else "cpu") 28 | if cuda: 29 | current_device = torch.cuda.current_device() 30 | print("Device:", torch.cuda.get_device_name(current_device)) 31 | else: 32 | print("Device: CPU") 33 | return device 34 | 35 | 36 | def get_classtable(CONFIG): 37 | with open(CONFIG.DATASET.LABELS) as f: 38 | classes = {} 39 | for label in f: 40 | label = label.rstrip().split("\t") 41 | classes[int(label[0])] = label[1].split(",")[0] 42 | return classes 43 | 44 | 45 | def setup_postprocessor(CONFIG): 46 | # CRF post-processor 47 | postprocessor = DenseCRF( 48 | iter_max=CONFIG.CRF.ITER_MAX, 49 | pos_xy_std=CONFIG.CRF.POS_XY_STD, 50 | pos_w=CONFIG.CRF.POS_W, 51 | bi_xy_std=CONFIG.CRF.BI_XY_STD, 52 | bi_rgb_std=CONFIG.CRF.BI_RGB_STD, 53 | bi_w=CONFIG.CRF.BI_W, 54 | ) 55 | return postprocessor 56 | 57 | 58 | def preprocessing(image, device, CONFIG): 59 | # Resize 60 | scale = CONFIG.IMAGE.SIZE.TEST / max(image.shape[:2]) 61 | image = cv2.resize(image, dsize=None, fx=scale, fy=scale) 62 | raw_image = image.astype(np.uint8) 63 | 64 | # Subtract mean values 65 | image = image.astype(np.float32) 66 | image -= np.array( 67 | [ 68 | float(CONFIG.IMAGE.MEAN.B), 69 | float(CONFIG.IMAGE.MEAN.G), 70 | float(CONFIG.IMAGE.MEAN.R), 71 | ] 72 | ) 73 | 74 | # Convert to torch.Tensor and add "batch" axis 75 | image = torch.from_numpy(image.transpose(2, 0, 1)).float().unsqueeze(0) 76 | image = image.to(device) 77 | 78 | return image, raw_image 79 | 80 | 81 | def inference(model, image, raw_image=None, postprocessor=None): 82 | _, _, H, W = image.shape 83 | 84 | # Image -> Probability map 85 | logits = model(image) 86 | logits = F.interpolate(logits, size=(H, W), mode="bilinear", align_corners=False) 87 | probs = F.softmax(logits, dim=1)[0] 88 | probs = probs.cpu().numpy() 89 | 90 | # Refine the prob map with CRF 91 | if postprocessor and raw_image is not None: 92 | probs = postprocessor(raw_image, probs) 93 | 94 | labelmap = np.argmax(probs, axis=0) 95 | 96 | return labelmap 97 | 98 | 99 | @click.group() 100 | @click.pass_context 101 | def main(ctx): 102 | """ 103 | Demo with a trained model 104 | """ 105 | 106 | print("Mode:", ctx.invoked_subcommand) 107 | 108 | 109 | @main.command() 110 | @click.option( 111 | "-c", 112 | "--config-path", 113 | type=click.File(), 114 | required=True, 115 | help="Dataset configuration file in YAML", 116 | ) 117 | @click.option( 118 | "-m", 119 | "--model-path", 120 | type=click.Path(exists=True), 121 | required=True, 122 | help="PyTorch model to be loaded", 123 | ) 124 | @click.option( 125 | "-i", 126 | "--image-path", 127 | type=click.Path(exists=True), 128 | required=True, 129 | help="Image to be processed", 130 | ) 131 | @click.option( 132 | "--cuda/--cpu", default=True, help="Enable CUDA if available [default: --cuda]" 133 | ) 134 | @click.option("--crf", is_flag=True, show_default=True, help="CRF post-processing") 135 | def single(config_path, model_path, image_path, cuda, crf): 136 | """ 137 | Inference from a single image 138 | """ 139 | 140 | # Setup 141 | CONFIG = OmegaConf.load(config_path) 142 | device = get_device(cuda) 143 | torch.set_grad_enabled(False) 144 | 145 | classes = get_classtable(CONFIG) 146 | postprocessor = setup_postprocessor(CONFIG) if crf else None 147 | 148 | model = eval(CONFIG.MODEL.NAME)(n_classes=CONFIG.DATASET.N_CLASSES) 149 | state_dict = torch.load(model_path, map_location=lambda storage, loc: storage) 150 | model.load_state_dict(state_dict) 151 | model.eval() 152 | model.to(device) 153 | print("Model:", CONFIG.MODEL.NAME) 154 | 155 | # Inference 156 | image = cv2.imread(image_path, cv2.IMREAD_COLOR) 157 | image, raw_image = preprocessing(image, device, CONFIG) 158 | labelmap = inference(model, image, raw_image, postprocessor) 159 | labels = np.unique(labelmap) 160 | 161 | # Show result for each class 162 | rows = np.floor(np.sqrt(len(labels) + 1)) 163 | cols = np.ceil((len(labels) + 1) / rows) 164 | 165 | plt.figure(figsize=(10, 10)) 166 | ax = plt.subplot(rows, cols, 1) 167 | ax.set_title("Input image") 168 | ax.imshow(raw_image[:, :, ::-1]) 169 | ax.axis("off") 170 | 171 | for i, label in enumerate(labels): 172 | mask = labelmap == label 173 | ax = plt.subplot(rows, cols, i + 2) 174 | ax.set_title(classes[label]) 175 | ax.imshow(raw_image[..., ::-1]) 176 | ax.imshow(mask.astype(np.float32), alpha=0.5) 177 | ax.axis("off") 178 | 179 | plt.tight_layout() 180 | plt.show() 181 | 182 | 183 | @main.command() 184 | @click.option( 185 | "-c", 186 | "--config-path", 187 | type=click.File(), 188 | required=True, 189 | help="Dataset configuration file in YAML", 190 | ) 191 | @click.option( 192 | "-m", 193 | "--model-path", 194 | type=click.Path(exists=True), 195 | required=True, 196 | help="PyTorch model to be loaded", 197 | ) 198 | @click.option( 199 | "--cuda/--cpu", default=True, help="Enable CUDA if available [default: --cuda]" 200 | ) 201 | @click.option("--crf", is_flag=True, show_default=True, help="CRF post-processing") 202 | @click.option("--camera-id", type=int, default=0, show_default=True, help="Device ID") 203 | def live(config_path, model_path, cuda, crf, camera_id): 204 | """ 205 | Inference from camera stream 206 | """ 207 | 208 | # Setup 209 | CONFIG = OmegaConf.load(config_path) 210 | device = get_device(cuda) 211 | torch.set_grad_enabled(False) 212 | torch.backends.cudnn.benchmark = True 213 | 214 | classes = get_classtable(CONFIG) 215 | postprocessor = setup_postprocessor(CONFIG) if crf else None 216 | 217 | model = eval(CONFIG.MODEL.NAME)(n_classes=CONFIG.DATASET.N_CLASSES) 218 | state_dict = torch.load(model_path, map_location=lambda storage, loc: storage) 219 | model.load_state_dict(state_dict) 220 | model.eval() 221 | model.to(device) 222 | print("Model:", CONFIG.MODEL.NAME) 223 | 224 | # UVC camera stream 225 | cap = cv2.VideoCapture(camera_id) 226 | cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"YUYV")) 227 | 228 | def colorize(labelmap): 229 | # Assign a unique color to each label 230 | labelmap = labelmap.astype(np.float32) / CONFIG.DATASET.N_CLASSES 231 | colormap = cm.jet_r(labelmap)[..., :-1] * 255.0 232 | return np.uint8(colormap) 233 | 234 | def mouse_event(event, x, y, flags, labelmap): 235 | # Show a class name of a mouse-overed pixel 236 | label = labelmap[y, x] 237 | name = classes[label] 238 | print(name) 239 | 240 | window_name = "{} + {}".format(CONFIG.MODEL.NAME, CONFIG.DATASET.NAME) 241 | cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE) 242 | 243 | while True: 244 | _, frame = cap.read() 245 | image, raw_image = preprocessing(frame, device, CONFIG) 246 | labelmap = inference(model, image, raw_image, postprocessor) 247 | colormap = colorize(labelmap) 248 | 249 | # Register mouse callback function 250 | cv2.setMouseCallback(window_name, mouse_event, labelmap) 251 | 252 | # Overlay prediction 253 | cv2.addWeighted(colormap, 0.5, raw_image, 0.5, 0.0, raw_image) 254 | 255 | # Quit by pressing "q" key 256 | cv2.imshow(window_name, raw_image) 257 | if cv2.waitKey(10) == ord("q"): 258 | break 259 | 260 | 261 | if __name__ == "__main__": 262 | main() 263 | -------------------------------------------------------------------------------- /utils/imutils.py: -------------------------------------------------------------------------------- 1 | import PIL.Image 2 | import random 3 | import numpy as np 4 | 5 | class RandomResizeLong(): 6 | 7 | def __init__(self, min_long, max_long): 8 | self.min_long = min_long 9 | self.max_long = max_long 10 | 11 | def __call__(self, img): 12 | 13 | target_long = random.randint(self.min_long, self.max_long) 14 | w, h = img.size 15 | 16 | if w < h: 17 | target_shape = (int(round(w * target_long / h)), target_long) 18 | else: 19 | target_shape = (target_long, int(round(h * target_long / w))) 20 | 21 | img = img.resize(target_shape, resample=PIL.Image.CUBIC) 22 | return img 23 | 24 | class ResizeShort(): 25 | def __init__(self, short_size): 26 | self.short_size = short_size 27 | 28 | def __call__(self, img): 29 | 30 | target_long = self.short_size 31 | w, h = img.size 32 | 33 | if w < h: 34 | target_shape = (target_long, int(round(h * target_long / w))) 35 | else: 36 | target_shape = (int(round(w * target_long / h)), target_long) 37 | 38 | img = img.resize(target_shape, resample=PIL.Image.CUBIC) 39 | return img 40 | 41 | class RandomCrop(): 42 | 43 | def __init__(self, cropsize): 44 | self.cropsize = cropsize 45 | 46 | def __call__(self, imgarr): 47 | 48 | c, h, w = imgarr.shape 49 | 50 | ch = min(self.cropsize, h) 51 | cw = min(self.cropsize, w) 52 | 53 | w_space = w - self.cropsize 54 | h_space = h - self.cropsize 55 | 56 | if w_space > 0: 57 | cont_left = 0 58 | img_left = random.randrange(w_space+1) 59 | else: 60 | cont_left = random.randrange(-w_space+1) 61 | img_left = 0 62 | 63 | if h_space > 0: 64 | cont_top = 0 65 | img_top = random.randrange(h_space+1) 66 | else: 67 | cont_top = random.randrange(-h_space+1) 68 | img_top = 0 69 | 70 | container = np.zeros((imgarr.shape[0], self.cropsize, self.cropsize), np.float32) 71 | container[:, cont_top:cont_top+ch, cont_left:cont_left+cw] = \ 72 | imgarr[:, img_top:img_top+ch, img_left:img_left+cw] 73 | 74 | return container 75 | 76 | def get_random_crop_box(imgsize, cropsize): 77 | h, w = imgsize 78 | 79 | ch = min(cropsize, h) 80 | cw = min(cropsize, w) 81 | 82 | w_space = w - cropsize 83 | h_space = h - cropsize 84 | 85 | if w_space > 0: 86 | cont_left = 0 87 | img_left = random.randrange(w_space + 1) 88 | else: 89 | cont_left = random.randrange(-w_space + 1) 90 | img_left = 0 91 | 92 | if h_space > 0: 93 | cont_top = 0 94 | img_top = random.randrange(h_space + 1) 95 | else: 96 | cont_top = random.randrange(-h_space + 1) 97 | img_top = 0 98 | 99 | return cont_top, cont_top+ch, cont_left, cont_left+cw, img_top, img_top+ch, img_left, img_left+cw 100 | 101 | def crop_with_box(img, box): 102 | if len(img.shape) == 3: 103 | img_cont = np.zeros((max(box[1]-box[0], box[4]-box[5]), max(box[3]-box[2], box[7]-box[6]), img.shape[-1]), dtype=img.dtype) 104 | else: 105 | img_cont = np.zeros((max(box[1] - box[0], box[4] - box[5]), max(box[3] - box[2], box[7] - box[6])), dtype=img.dtype) 106 | img_cont[box[0]:box[1], box[2]:box[3]] = img[box[4]:box[5], box[6]:box[7]] 107 | return img_cont 108 | 109 | 110 | def random_crop(images, cropsize, fills): 111 | if isinstance(images[0], PIL.Image.Image): 112 | imgsize = images[0].size[::-1] 113 | else: 114 | imgsize = images[0].shape[:2] 115 | box = get_random_crop_box(imgsize, cropsize) 116 | 117 | new_images = [] 118 | for img, f in zip(images, fills): 119 | 120 | if isinstance(img, PIL.Image.Image): 121 | img = img.crop((box[6], box[4], box[7], box[5])) 122 | cont = PIL.Image.new(img.mode, (cropsize, cropsize)) 123 | cont.paste(img, (box[2], box[0])) 124 | new_images.append(cont) 125 | 126 | else: 127 | if len(img.shape) == 3: 128 | cont = np.ones((cropsize, cropsize, img.shape[2]), img.dtype)*f 129 | else: 130 | cont = np.ones((cropsize, cropsize), img.dtype)*f 131 | cont[box[0]:box[1], box[2]:box[3]] = img[box[4]:box[5], box[6]:box[7]] 132 | new_images.append(cont) 133 | 134 | return new_images 135 | 136 | 137 | class AvgPool2d(): 138 | 139 | def __init__(self, ksize): 140 | self.ksize = ksize 141 | 142 | def __call__(self, img): 143 | import skimage.measure 144 | 145 | return skimage.measure.block_reduce(img, (self.ksize, self.ksize, 1), np.mean) 146 | 147 | 148 | class RandomHorizontalFlip(): 149 | def __init__(self): 150 | return 151 | 152 | def __call__(self, img): 153 | if bool(random.getrandbits(1)): 154 | img = np.fliplr(img).copy() 155 | return img 156 | 157 | 158 | class CenterCrop(): 159 | 160 | def __init__(self, cropsize, default_value=0): 161 | self.cropsize = cropsize 162 | self.default_value = default_value 163 | 164 | def __call__(self, npimg): 165 | 166 | h, w = npimg.shape[:2] 167 | 168 | ch = min(self.cropsize, h) 169 | cw = min(self.cropsize, w) 170 | 171 | sh = h - self.cropsize 172 | sw = w - self.cropsize 173 | 174 | if sw > 0: 175 | cont_left = 0 176 | img_left = int(round(sw / 2)) 177 | else: 178 | cont_left = int(round(-sw / 2)) 179 | img_left = 0 180 | 181 | if sh > 0: 182 | cont_top = 0 183 | img_top = int(round(sh / 2)) 184 | else: 185 | cont_top = int(round(-sh / 2)) 186 | img_top = 0 187 | 188 | if len(npimg.shape) == 2: 189 | container = np.ones((self.cropsize, self.cropsize), npimg.dtype)*self.default_value 190 | else: 191 | container = np.ones((self.cropsize, self.cropsize, npimg.shape[2]), npimg.dtype)*self.default_value 192 | 193 | container[cont_top:cont_top+ch, cont_left:cont_left+cw] = \ 194 | npimg[img_top:img_top+ch, img_left:img_left+cw] 195 | 196 | return container 197 | 198 | 199 | def HWC_to_CHW(img): 200 | return np.transpose(img, (2, 0, 1)) 201 | 202 | 203 | class RescaleNearest(): 204 | def __init__(self, scale): 205 | self.scale = scale 206 | 207 | def __call__(self, npimg): 208 | import cv2 209 | return cv2.resize(npimg, None, fx=self.scale, fy=self.scale, interpolation=cv2.INTER_NEAREST) 210 | 211 | 212 | def bb_IOU(boxA, boxB): 213 | boxA = [float(aa) for aa in boxA] 214 | boxB = [float(bb) for bb in boxB] 215 | 216 | xA = max(boxA[0], boxB[0]) 217 | yA = max(boxA[1], boxB[1]) 218 | xB = min(boxA[2], boxB[2]) 219 | yB = min(boxA[3], boxB[3]) 220 | 221 | if xA >= xB or yA >= yB: 222 | return 0, 0 223 | # compute the area of intersection rectangle 224 | interArea = (xB - xA + 1) * (yB - yA + 1) 225 | 226 | # compute the area of both the prediction and ground-truth 227 | # rectangles 228 | boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) 229 | boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) 230 | 231 | # compute the intersection over union by taking the intersection 232 | # area and dividing it by the sum of prediction + ground-truth 233 | # areas - the interesection area 234 | iou = interArea / float(boxAArea + boxBArea - interArea) 235 | recall = interArea / float(boxAArea) 236 | # return the intersection over union value 237 | return iou, recall 238 | 239 | def large_rect(rect): 240 | # find largest recteangles 241 | large_area = 0 242 | target = 0 243 | for i in range(len(rect)): 244 | area = rect[i][2]*rect[i][3] 245 | if large_area < area: 246 | large_area = area 247 | target = i 248 | 249 | x = rect[target][0] 250 | y = rect[target][1] 251 | w = rect[target][2] 252 | h = rect[target][3] 253 | 254 | return x, y, w, h 255 | 256 | 257 | def crf_inference(img, probs, t=10, scale_factor=1, labels=21): 258 | import pydensecrf.densecrf as dcrf 259 | from pydensecrf.utils import unary_from_softmax 260 | 261 | h, w = img.shape[:2] 262 | n_labels = labels 263 | 264 | d = dcrf.DenseCRF2D(w, h, n_labels) 265 | 266 | unary = unary_from_softmax(probs) 267 | unary = np.ascontiguousarray(unary) 268 | 269 | d.setUnaryEnergy(unary) 270 | d.addPairwiseGaussian(sxy=3/scale_factor, compat=3) 271 | d.addPairwiseBilateral(sxy=80/scale_factor, srgb=13, rgbim=np.copy(img), compat=10) 272 | Q = d.inference(t) 273 | 274 | return np.array(Q).reshape((n_labels, h, w)) 275 | -------------------------------------------------------------------------------- /deeplab-pytorch/convert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: http://kazuto1011.github.io 6 | # Created: 2017-11-15 7 | 8 | from __future__ import absolute_import, division, print_function 9 | 10 | import re 11 | import traceback 12 | from collections import Counter, OrderedDict 13 | 14 | import click 15 | import numpy as np 16 | import torch 17 | from addict import Dict 18 | 19 | from libs import caffe_pb2 20 | from libs.models import DeepLabV1_ResNet101, DeepLabV2_ResNet101_MSC 21 | 22 | 23 | def parse_caffemodel(model_path): 24 | caffemodel = caffe_pb2.NetParameter() 25 | with open(model_path, "rb") as f: 26 | caffemodel.MergeFromString(f.read()) 27 | 28 | # Check trainable layers 29 | print( 30 | *Counter( 31 | [(layer.type, len(layer.blobs)) for layer in caffemodel.layer] 32 | ).most_common(), 33 | sep="\n", 34 | ) 35 | 36 | params = OrderedDict() 37 | previous_layer_type = None 38 | for layer in caffemodel.layer: 39 | # Skip the shared branch 40 | if "res075" in layer.name or "res05" in layer.name: 41 | continue 42 | 43 | print( 44 | "\033[34m[Caffe]\033[00m", 45 | "{} ({}): {}".format(layer.name, layer.type, len(layer.blobs)), 46 | ) 47 | 48 | # Convolution or Dilated Convolution 49 | if "Convolution" in layer.type: 50 | params[layer.name] = {} 51 | params[layer.name]["kernel_size"] = layer.convolution_param.kernel_size[0] 52 | params[layer.name]["weight"] = list(layer.blobs[0].data) 53 | if len(layer.blobs) == 2: 54 | params[layer.name]["bias"] = list(layer.blobs[1].data) 55 | if len(layer.convolution_param.stride) == 1: # or [] 56 | params[layer.name]["stride"] = layer.convolution_param.stride[0] 57 | else: 58 | params[layer.name]["stride"] = 1 59 | if len(layer.convolution_param.pad) == 1: # or [] 60 | params[layer.name]["padding"] = layer.convolution_param.pad[0] 61 | else: 62 | params[layer.name]["padding"] = 0 63 | if isinstance(layer.convolution_param.dilation, int): 64 | params[layer.name]["dilation"] = layer.convolution_param.dilation 65 | elif len(layer.convolution_param.dilation) == 1: 66 | params[layer.name]["dilation"] = layer.convolution_param.dilation[0] 67 | else: 68 | params[layer.name]["dilation"] = 1 69 | # Fully-connected 70 | elif "InnerProduct" in layer.type: 71 | params[layer.name] = {} 72 | params[layer.name]["weight"] = list(layer.blobs[0].data) 73 | if len(layer.blobs) == 2: 74 | params[layer.name]["bias"] = list(layer.blobs[1].data) 75 | # Batch Normalization 76 | elif "BatchNorm" in layer.type: 77 | params[layer.name] = {} 78 | params[layer.name]["running_mean"] = ( 79 | np.array(layer.blobs[0].data) / layer.blobs[2].data[0] 80 | ) 81 | params[layer.name]["running_var"] = ( 82 | np.array(layer.blobs[1].data) / layer.blobs[2].data[0] 83 | ) 84 | params[layer.name]["eps"] = layer.batch_norm_param.eps 85 | params[layer.name]["momentum"] = ( 86 | 1 - layer.batch_norm_param.moving_average_fraction 87 | ) 88 | params[layer.name]["num_batches_tracked"] = np.array(0) 89 | batch_norm_layer = layer.name 90 | # Scale 91 | elif "Scale" in layer.type: 92 | assert previous_layer_type == "BatchNorm" 93 | params[batch_norm_layer]["weight"] = list(layer.blobs[0].data) 94 | params[batch_norm_layer]["bias"] = list(layer.blobs[1].data) 95 | elif "Pooling" in layer.type: 96 | params[layer.name] = {} 97 | params[layer.name]["kernel_size"] = layer.pooling_param.kernel_size 98 | params[layer.name]["stride"] = layer.pooling_param.stride 99 | params[layer.name]["padding"] = layer.pooling_param.pad 100 | 101 | previous_layer_type = layer.type 102 | 103 | return params 104 | 105 | 106 | # Hard coded translater 107 | def translate_layer_name(source, target="base"): 108 | def layer_block_branch(source, target): 109 | target += "layer{}".format(source[0][0]) 110 | if len(source[0][1:]) == 1: 111 | block = {"a": 1, "b": 2, "c": 3}.get(source[0][1:]) 112 | else: 113 | block = int(source[0][2:]) + 1 114 | target += ".block{}".format(block) 115 | branch = source[1][6:] 116 | if branch == "1": 117 | target += ".shortcut" 118 | elif branch == "2a": 119 | target += ".reduce" 120 | elif branch == "2b": 121 | target += ".conv3x3" 122 | elif branch == "2c": 123 | target += ".increase" 124 | return target 125 | 126 | source = source.split("_") 127 | 128 | if "pool" in source[0]: 129 | target += "layer1.pool" 130 | elif "fc" in source[0]: 131 | if len(source) == 3: 132 | stage = source[2] 133 | target += "aspp.{}".format(stage) 134 | else: 135 | target += "fc" 136 | elif "conv1" in source[0]: 137 | target += "layer1.conv1.conv" 138 | elif "conv1" in source[1]: 139 | target += "layer1.conv1.bn" 140 | elif "res" in source[0]: 141 | source[0] = source[0].replace("res", "") 142 | target = layer_block_branch(source, target) 143 | target += ".conv" 144 | elif "bn" in source[0]: 145 | source[0] = source[0].replace("bn", "") 146 | target = layer_block_branch(source, target) 147 | target += ".bn" 148 | 149 | return target 150 | 151 | 152 | @click.command() 153 | @click.option( 154 | "-d", 155 | "--dataset", 156 | type=click.Choice(["voc12", "coco"]), 157 | required=True, 158 | help="Caffemodel", 159 | ) 160 | def main(dataset): 161 | """ 162 | Convert caffemodels to pytorch models 163 | """ 164 | 165 | WHITELIST = ["kernel_size", "stride", "padding", "dilation", "eps", "momentum"] 166 | CONFIG = Dict( 167 | { 168 | "voc12": { 169 | # For loading the provided VOC 2012 caffemodel 170 | "PATH_CAFFE_MODEL": "data/models/voc12/deeplabv2_resnet101_msc/caffemodel/train2_iter_20000.caffemodel", 171 | "PATH_PYTORCH_MODEL": "data/models/voc12/deeplabv2_resnet101_msc/caffemodel/deeplabv2_resnet101_msc-vocaug.pth", 172 | "N_CLASSES": 21, 173 | "MODEL": "DeepLabV2_ResNet101_MSC", 174 | "HEAD": "base.", 175 | }, 176 | "coco": { 177 | # For loading the provided initial weights pre-trained on COCO 178 | "PATH_CAFFE_MODEL": "data/models/coco/deeplabv1_resnet101/caffemodel/init.caffemodel", 179 | "PATH_PYTORCH_MODEL": "data/models/coco/deeplabv1_resnet101/caffemodel/deeplabv1_resnet101-coco.pth", 180 | "N_CLASSES": 91, 181 | "MODEL": "DeepLabV1_ResNet101", 182 | "HEAD": "", 183 | }, 184 | }.get(dataset) 185 | ) 186 | 187 | params = parse_caffemodel(CONFIG.PATH_CAFFE_MODEL) 188 | 189 | model = eval(CONFIG.MODEL)(n_classes=CONFIG.N_CLASSES) 190 | model.eval() 191 | reference_state_dict = model.state_dict() 192 | 193 | rel_tol = 1e-7 194 | 195 | converted_state_dict = OrderedDict() 196 | for caffe_layer, caffe_layer_dict in params.items(): 197 | for param_name, caffe_values in caffe_layer_dict.items(): 198 | pytorch_layer = translate_layer_name(caffe_layer, CONFIG.HEAD) 199 | if pytorch_layer: 200 | pytorch_param = pytorch_layer + "." + param_name 201 | 202 | # Parameter check 203 | if param_name in WHITELIST: 204 | pytorch_values = eval("model." + pytorch_param) 205 | if isinstance(pytorch_values, tuple): 206 | assert ( 207 | pytorch_values[0] == caffe_values 208 | ), "Inconsistent values: {} @{} (Caffe), {} @{} (PyTorch)".format( 209 | caffe_values, 210 | caffe_layer + "/" + param_name, 211 | pytorch_values, 212 | pytorch_param, 213 | ) 214 | else: 215 | assert ( 216 | abs(pytorch_values - caffe_values) < rel_tol 217 | ), "Inconsistent values: {} @{} (Caffe), {} @{} (PyTorch)".format( 218 | caffe_values, 219 | caffe_layer + "/" + param_name, 220 | pytorch_values, 221 | pytorch_param, 222 | ) 223 | print( 224 | "\033[34m[Passed!]\033[00m", 225 | (caffe_layer + "/" + param_name).ljust(35), 226 | "->", 227 | pytorch_param, 228 | ) 229 | continue 230 | 231 | # Weight conversion 232 | if pytorch_param in reference_state_dict: 233 | caffe_values = torch.tensor(caffe_values) 234 | caffe_values = caffe_values.view_as( 235 | reference_state_dict[pytorch_param] 236 | ) 237 | converted_state_dict[pytorch_param] = caffe_values 238 | print( 239 | "\033[32m[Copied!]\033[00m", 240 | (caffe_layer + "/" + param_name).ljust(35), 241 | "->", 242 | pytorch_param, 243 | ) 244 | 245 | print("\033[32mVerify the converted model\033[00m") 246 | model.load_state_dict(converted_state_dict) 247 | 248 | print('Saving to "{}"'.format(CONFIG.PATH_PYTORCH_MODEL)) 249 | torch.save(converted_state_dict, CONFIG.PATH_PYTORCH_MODEL) 250 | 251 | 252 | if __name__ == "__main__": 253 | main() 254 | -------------------------------------------------------------------------------- /utils/LoadData.py: -------------------------------------------------------------------------------- 1 | # from torchvision import transforms 2 | from .transforms import transforms 3 | from torch.utils.data import DataLoader 4 | import torchvision 5 | import torch 6 | import numpy as np 7 | from torch.utils.data import Dataset 8 | from .imutils import ResizeShort 9 | import os 10 | from PIL import Image 11 | import random 12 | 13 | def train_data_loader(args, test_path=False, segmentation=False): 14 | if 'coco' in args.dataset: 15 | mean_vals = [0.471, 0.448, 0.408] 16 | std_vals = [0.234, 0.239, 0.242] 17 | else: 18 | mean_vals = [0.485, 0.456, 0.406] 19 | std_vals = [0.229, 0.224, 0.225] 20 | 21 | input_size = int(args.input_size) 22 | crop_size = int(args.crop_size) 23 | tsfm_train = transforms.Compose([#transforms.Resize(input_size), 24 | ResizeShort(224), 25 | transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), 26 | transforms.ToTensor(), 27 | transforms.Normalize(mean_vals, std_vals), 28 | ]) 29 | 30 | tsfm_test = transforms.Compose([ResizeShort(224), 31 | transforms.ToTensor(), 32 | transforms.Normalize(mean_vals, std_vals), 33 | ]) 34 | 35 | img_train = VOCDataset(args.train_list, root_dir=args.img_dir, num_classes=args.num_classes, transform=tsfm_train, test=True) 36 | img_test = VOCDataset(args.test_list, root_dir=args.img_dir, num_classes=args.num_classes, transform=tsfm_test, test=True) 37 | 38 | train_loader = DataLoader(img_train, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) 39 | val_loader = DataLoader(img_test, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) 40 | 41 | return train_loader, val_loader 42 | 43 | def test_data_loader(args, test_path=False, segmentation=False): 44 | if 'coco' in args.dataset: 45 | mean_vals = [0.471, 0.448, 0.408] 46 | std_vals = [0.234, 0.239, 0.242] 47 | else: 48 | mean_vals = [0.485, 0.456, 0.406] 49 | std_vals = [0.229, 0.224, 0.225] 50 | 51 | input_size = int(args.input_size) 52 | 53 | tsfm_test = transforms.Compose([#transforms.Resize(input_size), 54 | ResizeShort(224), 55 | transforms.ToTensor(), 56 | transforms.Normalize(mean_vals, std_vals), 57 | ]) 58 | 59 | img_test = VOCDataset(args.test_list, root_dir=args.img_dir, num_classes=args.num_classes, transform=tsfm_test, test=True) 60 | val_loader = DataLoader(img_test, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) 61 | 62 | return val_loader 63 | 64 | def test_msf_data_loader(args, test_path=False, segmentation=False): 65 | if 'coco' in args.dataset: 66 | mean_vals = [0.471, 0.448, 0.408] 67 | std_vals = [0.234, 0.239, 0.242] 68 | else: 69 | mean_vals = [0.485, 0.456, 0.406] 70 | std_vals = [0.229, 0.224, 0.225] 71 | 72 | 73 | input_size = int(args.input_size) 74 | crop_size = int(args.crop_size) 75 | tsfm_test = transforms.Compose([transforms.Resize(input_size), 76 | transforms.ToTensor(), 77 | transforms.Normalize(mean_vals, std_vals), 78 | ]) 79 | 80 | img_test = VOCDatasetMSF(args.test_list, root_dir=args.img_dir, num_classes=args.num_classes, scales=args.scales, transform=tsfm_test, test=True) 81 | val_loader = DataLoader(img_test, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) 82 | 83 | return val_loader 84 | 85 | class VOCDataset(Dataset): 86 | def __init__(self, datalist_file, root_dir, num_classes=20, transform=None, test=False): 87 | self.root_dir = root_dir 88 | self.testing = test 89 | self.datalist_file = datalist_file 90 | self.transform = transform 91 | self.num_classes = num_classes 92 | self.image_list, self.label_list = self.read_labeled_image_list(self.root_dir, self.datalist_file) 93 | 94 | def __len__(self): 95 | return len(self.image_list) 96 | 97 | def __getitem__(self, idx): 98 | img_name = self.image_list[idx] 99 | image = Image.open(img_name).convert('RGB') 100 | 101 | if self.transform is not None: 102 | image = self.transform(image) 103 | if self.testing: 104 | return img_name, image, self.label_list[idx] 105 | 106 | return image, self.label_list[idx] 107 | 108 | def read_labeled_image_list(self, data_dir, data_list): 109 | with open(data_list, 'r') as f: 110 | lines = f.readlines() 111 | img_name_list = [] 112 | img_labels = [] 113 | for line in lines: 114 | fields = line.strip().split() 115 | image = fields[0] + '.jpg' 116 | labels = np.zeros((self.num_classes,), dtype=np.float32) 117 | for i in range(len(fields)-1): 118 | index = int(fields[i+1]) 119 | labels[index] = 1. 120 | img_name_list.append(os.path.join(data_dir, image)) 121 | img_labels.append(labels) 122 | return img_name_list, img_labels#, np.array(img_labels, dtype=np.float32) 123 | 124 | ####integral attention model learning###### 125 | 126 | def train_data_loader_iam(args, test_path=False, segmentation=False): 127 | if 'coco' in args.dataset: 128 | mean_vals = [0.471, 0.448, 0.408] 129 | std_vals = [0.234, 0.239, 0.242] 130 | else: 131 | mean_vals = [0.485, 0.456, 0.406] 132 | std_vals = [0.229, 0.224, 0.225] 133 | 134 | input_size = int(args.input_size) 135 | crop_size = int(args.crop_size) 136 | tsfm_train = transforms.Compose([ResizeShort(224), 137 | transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), 138 | transforms.ToTensor(), 139 | transforms.Normalize(mean_vals, std_vals), 140 | ]) 141 | 142 | img_train = VOCDataset_iam(args.train_list, root_dir=args.img_dir, att_dir=args.att_dir, num_classes=args.num_classes, \ 143 | transform=tsfm_train, test=False) 144 | train_loader = DataLoader(img_train, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) 145 | 146 | return train_loader 147 | 148 | class VOCDataset_iam(Dataset): 149 | def __init__(self, datalist_file, root_dir, att_dir, num_classes=20, transform=None, test=False): 150 | self.root_dir = root_dir 151 | self.att_dir = att_dir 152 | self.testing = test 153 | self.datalist_file = datalist_file 154 | self.transform = transform 155 | self.num_classes = num_classes 156 | self.image_list, self.label_list, self.label_name_list = \ 157 | self.read_labeled_image_list(self.root_dir, self.att_dir, self.datalist_file) 158 | 159 | def __len__(self): 160 | return len(self.image_list) 161 | 162 | def __getitem__(self, idx): 163 | img_name = self.image_list[idx] 164 | image = Image.open(img_name).convert('RGB') 165 | 166 | im_labels = self.label_list[idx] 167 | im_label_names = self.label_name_list[idx] 168 | tmp = Image.open(im_label_names[0]) 169 | h, w = tmp.size 170 | labels = np.zeros((self.num_classes, w, h), dtype=np.float32) 171 | 172 | for j in range(len(im_label_names)): 173 | label = im_labels[j] 174 | label_name = im_label_names[j] 175 | labels[label] = np.asarray(Image.open(label_name)) 176 | labels /= 255.0 177 | 178 | if self.transform is not None: 179 | image = self.transform(image) 180 | 181 | return image, labels 182 | 183 | def read_labeled_image_list(self, data_dir, att_dir, data_list): 184 | with open(data_list, 'r') as f: 185 | lines = f.readlines() 186 | 187 | img_name_list = [] 188 | label_list = [] 189 | label_name_list = [] 190 | 191 | for i, line in enumerate(lines): 192 | fields = line.strip().split() 193 | image = fields[0] + '.jpg' 194 | img_name_list.append(os.path.join(data_dir, image)) 195 | 196 | im_labels = [] 197 | im_label_names = [] 198 | 199 | for j in range(len(fields)-1): 200 | im_labels.append(int(fields[j+1])) 201 | index = '{}_{}.png'.format(i, fields[j+1]) 202 | im_label_names.append(os.path.join(att_dir, index)) 203 | 204 | label_list.append(im_labels) 205 | label_name_list.append(im_label_names) 206 | 207 | return img_name_list, label_list, label_name_list 208 | 209 | class VOCDatasetMSF(Dataset): 210 | def __init__(self, datalist_file, root_dir, num_classes=20, scales=[0.5, 1, 1.5, 2], transform=None, test=False): 211 | self.root_dir = root_dir 212 | self.testing = test 213 | self.datalist_file = datalist_file 214 | self.scales = scales 215 | self.transform = transform 216 | self.num_classes = num_classes 217 | self.image_list, self.label_list = self.read_labeled_image_list(self.root_dir, self.datalist_file) 218 | 219 | def __len__(self): 220 | return len(self.image_list) 221 | 222 | def __getitem__(self, idx): 223 | img_name = self.image_list[idx] 224 | image = Image.open(img_name).convert('RGB') 225 | 226 | ms_img_list = [] 227 | for s in self.scales: 228 | target_size = (int(round(image.size[0]*s)), 229 | int(round(image.size[1]*s))) 230 | s_img = image.resize(target_size, resample=Image.CUBIC) 231 | ms_img_list.append(s_img) 232 | 233 | if self.transform is not None: 234 | for i in range(len(ms_img_list)): 235 | ms_img_list[i] = self.transform(ms_img_list[i]) 236 | 237 | msf_img_list = [] 238 | for i in range(len(ms_img_list)): 239 | msf_img_list.append(ms_img_list[i]) 240 | msf_img_list.append(np.flip(ms_img_list[i], -1).copy()) 241 | 242 | if self.testing: 243 | return img_name, msf_img_list, self.label_list[idx] 244 | 245 | return msf_img_list, self.label_list[idx] 246 | 247 | def read_labeled_image_list(self, data_dir, data_list): 248 | with open(data_list, 'r') as f: 249 | lines = f.readlines() 250 | img_name_list = [] 251 | img_labels = [] 252 | for line in lines: 253 | fields = line.strip().split() 254 | image = fields[0] + '.jpg' 255 | labels = np.zeros((self.num_classes,), dtype=np.float32) 256 | for i in range(len(fields)-1): 257 | index = int(fields[i+1]) 258 | labels[index] = 1. 259 | img_name_list.append(os.path.join(data_dir, image)) 260 | img_labels.append(labels) 261 | return img_name_list, img_labels #np.array(img_labels, dtype=np.float32) 262 | -------------------------------------------------------------------------------- /deeplab-pytorch/README.md: -------------------------------------------------------------------------------- 1 | # DeepLab with PyTorch 2 | 3 | This is an unofficial **PyTorch** implementation of **DeepLab v2** [[1](##references)] with a **ResNet-101** backbone. 4 | * **COCO-Stuff** dataset [[2](##references)] and **PASCAL VOC** dataset [[3]()] are supported. 5 | * The official Caffe weights provided by the authors can be used without building the Caffe APIs. 6 | * DeepLab v3/v3+ models with the identical backbone are also included (not tested). 7 | * [```torch.hub``` is supported](#torchhub). 8 | 9 | ## Performance 10 | 11 | ### COCO-Stuff 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 |
Train setEval setCodeWeightCRF?Pixel
Accuracy
Mean
Accuracy
Mean IoUFreqW IoU
27 | 10k train † 28 | 10k valOfficial [2]65.145.534.450.4
This repoDownload65.845.734.851.2
67.146.435.652.5
56 | 164k train 57 | 164k valThis repoDownload66.851.239.151.5
67.651.539.752.3
75 | 76 | † Images and labels are pre-warped to square-shape 513x513
77 | ‡ Note for [SPADE](https://nvlabs.github.io/SPADE/) followers: The provided COCO-Stuff 164k weight has been kept intact since 2019/02/23. 78 | 79 | ### PASCAL VOC 2012 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 |
Train setEval setCodeWeightCRF?Pixel
Accuracy
Mean
Accuracy
Mean IoUFreqW IoU
95 | trainaug 96 | valOfficial [3]--76.35-
--77.69-
This repoDownload94.6486.5076.6590.41
95.0486.6477.9391.06
130 | 131 | ## Setup 132 | 133 | ### Requirements 134 | 135 | Required Python packages are listed in the Anaconda configuration file `configs/conda_env.yaml`. 136 | Please modify the listed `cudatoolkit=10.2` and `python=3.6` as needed and run the following commands. 137 | 138 | ```sh 139 | # Set up with Anaconda 140 | conda env create -f configs/conda_env.yaml 141 | conda activate deeplab-pytorch 142 | ``` 143 | 144 | ### Download datasets 145 | 146 | * [COCO-Stuff 10k/164k](data/datasets/cocostuff/README.md) 147 | * [PASCAL VOC 2012](data/datasets/voc12/README.md) 148 | 149 | ### Download pre-trained caffemodels 150 | 151 | Caffemodels pre-trained on COCO and PASCAL VOC datasets are released by the DeepLab authors. 152 | In accordance with the papers [[1](##references),[2](##references)], this repository uses the COCO-trained parameters as initial weights. 153 | 154 | 1. Run the follwing script to download the pre-trained caffemodels (1GB+). 155 | 156 | ```sh 157 | $ bash scripts/setup_caffemodels.sh 158 | ``` 159 | 160 | 2. Convert the caffemodels to pytorch compatibles. No need to build the Caffe API! 161 | 162 | ```sh 163 | # Generate "deeplabv1_resnet101-coco.pth" from "init.caffemodel" 164 | $ python convert.py --dataset coco 165 | # Generate "deeplabv2_resnet101_msc-vocaug.pth" from "train2_iter_20000.caffemodel" 166 | $ python convert.py --dataset voc12 167 | ``` 168 | 169 | ## Training & Evaluation 170 | 171 | To train DeepLab v2 on PASCAL VOC 2012: 172 | 173 | ```sh 174 | python main.py train \ 175 | --config-path configs/voc12.yaml 176 | ``` 177 | 178 | To evaluate the performance on a validation set: 179 | 180 | ```sh 181 | python main.py test \ 182 | --config-path configs/voc12.yaml \ 183 | --model-path data/models/voc12/deeplabv2_resnet101_msc/train_aug/checkpoint_final.pth 184 | ``` 185 | 186 | Note: This command saves the predicted logit maps (`.npy`) and the scores (`.json`). 187 | 188 | To re-evaluate with a CRF post-processing:
189 | 190 | ```sh 191 | python main.py crf \ 192 | --config-path configs/voc12.yaml 193 | ``` 194 | 195 | Execution of a series of the above scripts is equivalent to `bash scripts/train_eval.sh`. 196 | 197 | To monitor a loss, run the following command in a separate terminal. 198 | 199 | ```sh 200 | tensorboard --logdir data/logs 201 | ``` 202 | 203 | Please specify the appropriate configuration files for the other datasets. 204 | 205 | | Dataset | Config file | #Iterations | Classes | 206 | | :-------------- | :--------------------------- | :---------- | :--------------------------- | 207 | | PASCAL VOC 2012 | `configs/voc12.yaml` | 20,000 | 20 foreground + 1 background | 208 | | COCO-Stuff 10k | `configs/cocostuff10k.yaml` | 20,000 | 182 thing/stuff | 209 | | COCO-Stuff 164k | `configs/cocostuff164k.yaml` | 100,000 | 182 thing/stuff | 210 | 211 | Note: Although the label indices range from 0 to 181 in COCO-Stuff 10k/164k, only [171 classes](https://github.com/nightrome/cocostuff/blob/master/labels.md) are supervised. 212 | 213 | Common settings: 214 | 215 | - **Model**: DeepLab v2 with ResNet-101 backbone. Dilated rates of ASPP are (6, 12, 18, 24). Output stride is 8. 216 | - **GPU**: All the GPUs visible to the process are used. Please specify the scope with 217 | ```CUDA_VISIBLE_DEVICES=```. 218 | - **Multi-scale loss**: Loss is defined as a sum of responses from multi-scale inputs (1x, 0.75x, 0.5x) and element-wise max across the scales. The *unlabeled* class is ignored in the loss computation. 219 | - **Gradient accumulation**: The mini-batch of 10 samples is not processed at once due to the high occupancy of GPU memories. Instead, gradients of small batches of 5 samples are accumulated for 2 iterations, and weight updating is performed at the end (```batch_size * iter_size = 10```). GPU memory usage is approx. 11.2 GB with the default setting (tested on the single Titan X). You can reduce it with a small ```batch_size```. 220 | - **Learning rate**: Stochastic gradient descent (SGD) is used with momentum of 0.9 and initial learning rate of 2.5e-4. Polynomial learning rate decay is employed; the learning rate is multiplied by ```(1-iter/iter_max)**power``` at every 10 iterations. 221 | - **Monitoring**: Moving average loss (```average_loss``` in Caffe) can be monitored in TensorBoard. 222 | - **Preprocessing**: Input images are randomly re-scaled by factors ranging from 0.5 to 1.5, padded if needed, and randomly cropped to 321x321. 223 | 224 | Processed images and labels in COCO-Stuff 164k: 225 | 226 | ![Data](docs/datasets/cocostuff.png) 227 | 228 | ## Inference Demo 229 | 230 | You can use [the pre-trained models](#performance), [the converted models](#download-pre-trained-caffemodels), or your models. 231 | 232 | To process a single image: 233 | 234 | ```bash 235 | python demo.py single \ 236 | --config-path configs/voc12.yaml \ 237 | --model-path deeplabv2_resnet101_msc-vocaug-20000.pth \ 238 | --image-path image.jpg 239 | ``` 240 | 241 | To run on a webcam: 242 | 243 | ```bash 244 | python demo.py live \ 245 | --config-path configs/voc12.yaml \ 246 | --model-path deeplabv2_resnet101_msc-vocaug-20000.pth 247 | ``` 248 | 249 | To run a CRF post-processing, add `--crf`. To run on a CPU, add `--cpu`. 250 | 251 | ## Misc 252 | 253 | ### torch.hub 254 | 255 | Model setup with two lines 256 | 257 | ```python 258 | import torch.hub 259 | model = torch.hub.load("kazuto1011/deeplab-pytorch", "deeplabv2_resnet101", pretrained='cocostuff164k', n_classes=182) 260 | ``` 261 | 262 | ### Difference with Caffe version 263 | 264 | * While the official code employs 1/16 bilinear interpolation (```Interp``` layer) for downsampling a label for only 0.5x input, this codebase does for both 0.5x and 0.75x inputs with nearest interpolation (```PIL.Image.resize```, [related issue](https://github.com/kazuto1011/deeplab-pytorch/issues/51)). 265 | * Bilinear interpolation on images and logits is performed with the ```align_corners=False```. 266 | 267 | ### Training batch normalization 268 | 269 | 270 | This codebase only supports DeepLab v2 training which freezes batch normalization layers, although 271 | v3/v3+ protocols require training them. If training their parameters on multiple GPUs as well in your projects, please 272 | install [the extra library](https://hangzhang.org/PyTorch-Encoding/) below. 273 | 274 | ```bash 275 | pip install torch-encoding 276 | ``` 277 | 278 | Batch normalization layers in a model are automatically switched in ```libs/models/resnet.py```. 279 | 280 | ```python 281 | try: 282 | from encoding.nn import SyncBatchNorm 283 | _BATCH_NORM = SyncBatchNorm 284 | except: 285 | _BATCH_NORM = nn.BatchNorm2d 286 | ``` 287 | 288 | ## References 289 | 290 | 1. L.-C. Chen, G. Papandreou, I. Kokkinos, K. Murphy, A. L. Yuille. DeepLab: Semantic Image 291 | Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. *IEEE TPAMI*, 292 | 2018.
293 | [Project](http://liangchiehchen.com/projects/DeepLab.html) / 294 | [Code](https://bitbucket.org/aquariusjay/deeplab-public-ver2) / [arXiv 295 | paper](https://arxiv.org/abs/1606.00915) 296 | 297 | 2. H. Caesar, J. Uijlings, V. Ferrari. COCO-Stuff: Thing and Stuff Classes in Context. In *CVPR*, 2018.
298 | [Project](https://github.com/nightrome/cocostuff) / [arXiv paper](https://arxiv.org/abs/1612.03716) 299 | 300 | 1. M. Everingham, L. Van Gool, C. K. I. Williams, J. Winn, A. Zisserman. The PASCAL Visual Object 301 | Classes (VOC) Challenge. *IJCV*, 2010.
302 | [Project](http://host.robots.ox.ac.uk/pascal/VOC) / 303 | [Paper](http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham10.pdf) 304 | -------------------------------------------------------------------------------- /utils/datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import numpy as np 4 | import random 5 | import collections 6 | import torch 7 | import torchvision 8 | import cv2 9 | from torch.utils import data 10 | 11 | 12 | class VOCDataSet(data.Dataset): 13 | def __init__(self, root, list_path, max_iters=None, crop_size=(321, 321), mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255): 14 | self.root = root 15 | self.list_path = list_path 16 | self.crop_h, self.crop_w = crop_size 17 | self.scale = scale 18 | self.ignore_label = ignore_label 19 | self.mean = mean 20 | self.is_mirror = mirror 21 | # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434]) 22 | self.img_ids = [i_id.strip() for i_id in open(list_path)] 23 | #if not max_iters==None: 24 | # self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids))) 25 | self.files = [] 26 | # for split in ["train", "trainval", "val"]: 27 | for name in self.img_ids: 28 | #img_file = osp.join(self.root, "JPEGImages/%s.jpg" % name) 29 | #label_file = osp.join(self.root, "SegmentationClassAug/%s.png" % name) 30 | #img_file = osp.join(self.root, "trainval_image/%s.jpg" % name) 31 | #label_file = osp.join(self.root, "trainval_gt/%s.png" % name) 32 | self.files.append({ 33 | "img": img_file, 34 | "label": label_file, 35 | "name": name 36 | }) 37 | 38 | def __len__(self): 39 | return len(self.files) 40 | 41 | def generate_scale_label(self, image, label): 42 | f_scale = 0.5 + random.randint(0, 11) / 10.0 43 | image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR) 44 | label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST) 45 | return image, label 46 | 47 | def __getitem__(self, index): 48 | datafiles = self.files[index] 49 | image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) 50 | label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE) 51 | size = image.shape 52 | name = datafiles["name"] 53 | if self.scale: 54 | image, label = self.generate_scale_label(image, label) 55 | image = np.asarray(image, np.float32) 56 | image -= self.mean 57 | img_h, img_w = label.shape 58 | pad_h = max(self.crop_h - img_h, 0) 59 | pad_w = max(self.crop_w - img_w, 0) 60 | if pad_h > 0 or pad_w > 0: 61 | img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, 62 | pad_w, cv2.BORDER_CONSTANT, 63 | value=(0.0, 0.0, 0.0)) 64 | label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, 65 | pad_w, cv2.BORDER_CONSTANT, 66 | value=(self.ignore_label,)) 67 | else: 68 | img_pad, label_pad = image, label 69 | 70 | img_h, img_w = label_pad.shape 71 | h_off = random.randint(0, img_h - self.crop_h) 72 | w_off = random.randint(0, img_w - self.crop_w) 73 | # roi = cv2.Rect(w_off, h_off, self.crop_w, self.crop_h); 74 | image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32) 75 | label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32) 76 | #image = image[:, :, ::-1] # change to BGR 77 | image = image.transpose((2, 0, 1)) 78 | if self.is_mirror: 79 | flip = np.random.choice(2) * 2 - 1 80 | image = image[:, :, ::flip] 81 | label = label[:, ::flip] 82 | 83 | return image.copy(), label.copy(), np.array(size), name 84 | 85 | 86 | class VOCDataTestSet(data.Dataset): 87 | def __init__(self, root, list_path, crop_size=(505, 505), mean=(128, 128, 128)): 88 | self.root = root 89 | self.list_path = list_path 90 | self.crop_h, self.crop_w = crop_size 91 | self.mean = mean 92 | # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434]) 93 | self.img_ids = [i_id.strip() for i_id in open(list_path)] 94 | self.files = [] 95 | # for split in ["train", "trainval", "val"]: 96 | for name in self.img_ids: 97 | img_file = osp.join(self.root, "JPEGImages/%s.jpg" % name) 98 | label_file = osp.join(self.root, "SegmentationClassAug/%s.png" % name) 99 | self.files.append({ 100 | "img": img_file, 101 | "label": label_file 102 | }) 103 | 104 | def __len__(self): 105 | return len(self.files) 106 | 107 | def __getitem__(self, index): 108 | datafiles = self.files[index] 109 | image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) 110 | label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE) 111 | name = osp.splitext(osp.basename(datafiles["img"]))[0] 112 | image = np.asarray(image, np.float32) 113 | 114 | img_h, img_w, _ = image.shape 115 | #max_size = max(img_h, img_w) 116 | #ratio = float(self.crop_h) / float(max_size) 117 | #new_h = int(ratio * img_h) 118 | #new_w = int(ratio * img_w) 119 | #image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC) 120 | size = image.shape 121 | 122 | 123 | image -= self.mean 124 | pad_h = max(self.crop_h - img_h, 0) 125 | pad_w = max(self.crop_w - img_w, 0) 126 | if pad_h > 0 or pad_w > 0: 127 | image = cv2.copyMakeBorder(image, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT, value=(0.0, 0.0, 0.0)) 128 | image = image.transpose((2, 0, 1)) 129 | return image, label, np.array(size), name 130 | 131 | class CSDataSet(data.Dataset): 132 | def __init__(self, root, list_path, max_iters=None, crop_size=(321, 321), mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255): 133 | self.root = root 134 | self.list_path = list_path 135 | self.crop_h, self.crop_w = crop_size 136 | self.scale = scale 137 | self.ignore_label = ignore_label 138 | self.mean = mean 139 | self.is_mirror = mirror 140 | # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434]) 141 | self.img_ids = [i_id.strip().split() for i_id in open(list_path)] 142 | if not max_iters==None: 143 | self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids))) 144 | self.files = [] 145 | # for split in ["train", "trainval", "val"]: 146 | for item in self.img_ids: 147 | image_path, label_path = item 148 | name = osp.splitext(osp.basename(label_path))[0] 149 | img_file = osp.join(self.root, image_path) 150 | label_file = osp.join(self.root, label_path) 151 | self.files.append({ 152 | "img": img_file, 153 | "label": label_file, 154 | "name": name 155 | }) 156 | self.id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label, 157 | 3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label, 158 | 7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4, 159 | 14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5, 160 | 18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14, 161 | 28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18} 162 | print('{} images are loaded!'.format(len(self.img_ids))) 163 | 164 | def __len__(self): 165 | return len(self.files) 166 | 167 | def generate_scale_label(self, image, label): 168 | f_scale = 0.7 + random.randint(0, 14) / 10.0 169 | image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR) 170 | label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST) 171 | return image, label 172 | 173 | def id2trainId(self, label, reverse=False): 174 | label_copy = label.copy() 175 | if reverse: 176 | for v, k in self.id_to_trainid.items(): 177 | label_copy[label == k] = v 178 | else: 179 | for k, v in self.id_to_trainid.items(): 180 | label_copy[label == k] = v 181 | return label_copy 182 | 183 | def __getitem__(self, index): 184 | datafiles = self.files[index] 185 | image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) 186 | label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE) 187 | label = self.id2trainId(label) 188 | size = image.shape 189 | name = datafiles["name"] 190 | if self.scale: 191 | image, label = self.generate_scale_label(image, label) 192 | image = np.asarray(image, np.float32) 193 | image -= self.mean 194 | img_h, img_w = label.shape 195 | pad_h = max(self.crop_h - img_h, 0) 196 | pad_w = max(self.crop_w - img_w, 0) 197 | if pad_h > 0 or pad_w > 0: 198 | img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, 199 | pad_w, cv2.BORDER_CONSTANT, 200 | value=(0.0, 0.0, 0.0)) 201 | label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, 202 | pad_w, cv2.BORDER_CONSTANT, 203 | value=(self.ignore_label,)) 204 | else: 205 | img_pad, label_pad = image, label 206 | 207 | img_h, img_w = label_pad.shape 208 | h_off = random.randint(0, img_h - self.crop_h) 209 | w_off = random.randint(0, img_w - self.crop_w) 210 | # roi = cv2.Rect(w_off, h_off, self.crop_w, self.crop_h); 211 | image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32) 212 | label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32) 213 | #image = image[:, :, ::-1] # change to BGR 214 | image = image.transpose((2, 0, 1)) 215 | if self.is_mirror: 216 | flip = np.random.choice(2) * 2 - 1 217 | image = image[:, :, ::flip] 218 | label = label[:, ::flip] 219 | 220 | return image.copy(), label.copy(), np.array(size), name 221 | 222 | 223 | class CSDataTestSet(data.Dataset): 224 | def __init__(self, root, list_path, crop_size=(505, 505), mean=(128, 128, 128)): 225 | self.root = root 226 | self.list_path = list_path 227 | self.crop_h, self.crop_w = crop_size 228 | self.mean = mean 229 | # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434]) 230 | self.img_ids = [i_id.strip().split() for i_id in open(list_path)] 231 | self.files = [] 232 | # for split in ["train", "trainval", "val"]: 233 | for item in self.img_ids: 234 | image_path, label_path = item 235 | name = osp.splitext(osp.basename(label_path))[0] 236 | img_file = osp.join(self.root, image_path) 237 | self.files.append({ 238 | "img": img_file 239 | }) 240 | 241 | def __len__(self): 242 | return len(self.files) 243 | 244 | def __getitem__(self, index): 245 | datafiles = self.files[index] 246 | image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) 247 | size = image.shape 248 | name = osp.splitext(osp.basename(datafiles["img"]))[0] 249 | image = np.asarray(image, np.float32) 250 | image -= self.mean 251 | 252 | img_h, img_w, _ = image.shape 253 | pad_h = max(self.crop_h - img_h, 0) 254 | pad_w = max(self.crop_w - img_w, 0) 255 | if pad_h > 0 or pad_w > 0: 256 | image = cv2.copyMakeBorder(image, 0, pad_h, 0, 257 | pad_w, cv2.BORDER_CONSTANT, 258 | value=(0.0, 0.0, 0.0)) 259 | image = image.transpose((2, 0, 1)) 260 | return image, name, size 261 | 262 | class CSDataTestSet(data.Dataset): 263 | def __init__(self, root, list_path, crop_size=(505, 505)): 264 | self.root = root 265 | self.list_path = list_path 266 | self.crop_h, self.crop_w = crop_size 267 | # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434]) 268 | self.img_ids = [i_id.strip().split()[0] for i_id in open(list_path)] 269 | self.files = [] 270 | # for split in ["train", "trainval", "val"]: 271 | for image_path in self.img_ids: 272 | name = osp.splitext(osp.basename(image_path))[0] 273 | img_file = osp.join(self.root, image_path) 274 | self.files.append({ 275 | "img": img_file 276 | }) 277 | 278 | def __len__(self): 279 | return len(self.files) 280 | 281 | def __getitem__(self, index): 282 | datafiles = self.files[index] 283 | image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) 284 | image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR) 285 | size = image.shape 286 | name = osp.splitext(osp.basename(datafiles["img"]))[0] 287 | image = np.asarray(image, np.float32) 288 | image = (image - image.min()) / (image.max() - image.min()) 289 | 290 | img_h, img_w, _ = image.shape 291 | pad_h = max(self.crop_h - img_h, 0) 292 | pad_w = max(self.crop_w - img_w, 0) 293 | if pad_h > 0 or pad_w > 0: 294 | image = cv2.copyMakeBorder(image, 0, pad_h, 0, 295 | pad_w, cv2.BORDER_CONSTANT, 296 | value=(0.0, 0.0, 0.0)) 297 | image = image.transpose((2, 0, 1)) 298 | return image, np.array(size), name 299 | 300 | if __name__ == '__main__': 301 | dst = VOCDataSet("./data", is_transform=True) 302 | trainloader = data.DataLoader(dst, batch_size=4) 303 | for i, data in enumerate(trainloader): 304 | imgs, labels = data 305 | if i == 0: 306 | img = torchvision.utils.make_grid(imgs).numpy() 307 | img = np.transpose(img, (1, 2, 0)) 308 | img = img[:, :, ::-1] 309 | plt.imshow(img) 310 | plt.show() 311 | --------------------------------------------------------------------------------