├── deeplab-pytorch
    ├── libs
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   └── caffe_pb2.cpython-36.pyc
    │   ├── models
    │   │   ├── __pycache__
    │   │   │   ├── msc.cpython-36.pyc
    │   │   │   ├── resnet.cpython-36.pyc
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── deeplabv1.cpython-36.pyc
    │   │   │   ├── deeplabv2.cpython-36.pyc
    │   │   │   ├── deeplabv3.cpython-36.pyc
    │   │   │   └── deeplabv3plus.cpython-36.pyc
    │   │   ├── msc.py
    │   │   ├── deeplabv1.py
    │   │   ├── deeplabv2.py
    │   │   ├── __init__.py
    │   │   ├── deeplabv3plus.py
    │   │   ├── deeplabv3.py
    │   │   └── resnet.py
    │   ├── utils
    │   │   ├── __pycache__
    │   │   │   ├── crf.cpython-36.pyc
    │   │   │   ├── metric.cpython-36.pyc
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   └── lr_scheduler.cpython-36.pyc
    │   │   ├── __init__.py
    │   │   ├── lr_scheduler.py
    │   │   ├── crf.py
    │   │   └── metric.py
    │   └── datasets
    │   │   ├── __pycache__
    │   │       ├── base.cpython-36.pyc
    │   │       ├── voc.cpython-36.pyc
    │   │       ├── __init__.cpython-36.pyc
    │   │       └── cocostuff.cpython-36.pyc
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── voc.py
    │   │   └── cocostuff.py
    ├── data
    │   ├── models
    │   │   ├── coco
    │   │   │   └── deeplabv1_resnet101
    │   │   │   │   └── caffemodel
    │   │   │   │       └── .gitkeep
    │   │   └── voc12
    │   │   │   └── deeplabv2_resnet101_msc
    │   │   │       └── caffemodel
    │   │   │           └── .gitkeep
    │   └── datasets
    │   │   ├── voc12
    │   │       ├── labels.txt
    │   │       └── README.md
    │   │   ├── coco
    │   │       └── labels.txt
    │   │   └── cocostuff
    │   │       ├── README.md
    │   │       ├── labels.txt
    │   │       └── cocostuff_hierarchy.yaml
    ├── scripts
    │   ├── setup_voc12.sh
    │   ├── setup_cocostuff10k.sh
    │   ├── setup_caffemodels.sh
    │   ├── setup_cocostuff164k.sh
    │   └── train_eval.sh
    ├── configs
    │   ├── conda_env.yaml
    │   ├── coco.yaml
    │   ├── voc12.yaml
    │   ├── cocostuff10k.yaml
    │   └── cocostuff164k.yaml
    ├── LICENSE
    ├── hubconf.py
    ├── demo.py
    ├── convert.py
    └── README.md
├── utils
    ├── __init__.py
    ├── transforms
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── functional.pyc
    │   ├── transforms.pyc
    │   └── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── functional.cpython-36.pyc
    │   │   └── transforms.cpython-36.pyc
    ├── avgMeter.py
    ├── Metrics.py
    ├── pyutils.py
    ├── Restore.py
    ├── torchutils.py
    ├── imutils.py
    ├── LoadData.py
    └── datasets.py
├── scripts
    ├── __pycache__
    │   └── my_optim.cpython-36.pyc
    ├── test_iam.py
    ├── test.py
    ├── my_optim.py
    ├── train_iam.py
    └── train.py
├── test.sh
├── test_iam.sh
├── train.sh
├── train_iam.sh
├── train+.sh
├── res.py
├── runs
    └── exp1
    │   └── res.py
├── models
    ├── vgg1.py
    └── vgg.py
├── README.md
└── gen_gt.py


/deeplab-pytorch/libs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .avgMeter import *
2 | 


--------------------------------------------------------------------------------
/utils/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import *


--------------------------------------------------------------------------------
/deeplab-pytorch/data/models/coco/deeplabv1_resnet101/caffemodel/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/data/models/voc12/deeplabv2_resnet101_msc/caffemodel/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/transforms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/__init__.pyc


--------------------------------------------------------------------------------
/utils/transforms/functional.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/functional.pyc


--------------------------------------------------------------------------------
/utils/transforms/transforms.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/transforms.pyc


--------------------------------------------------------------------------------
/scripts/__pycache__/my_optim.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/scripts/__pycache__/my_optim.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/transforms/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/transforms/__pycache__/functional.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/__pycache__/functional.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/transforms/__pycache__/transforms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/utils/transforms/__pycache__/transforms.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/__pycache__/caffe_pb2.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/__pycache__/caffe_pb2.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/__pycache__/msc.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/msc.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/utils/__pycache__/crf.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/utils/__pycache__/crf.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/datasets/__pycache__/base.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/datasets/__pycache__/base.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/datasets/__pycache__/voc.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/datasets/__pycache__/voc.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/__pycache__/resnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/resnet.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/utils/__pycache__/metric.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/utils/__pycache__/metric.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/__pycache__/deeplabv1.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/deeplabv1.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/__pycache__/deeplabv2.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/deeplabv2.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/__pycache__/deeplabv3.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/deeplabv3.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .crf import DenseCRF
3 | from .lr_scheduler import PolynomialLR
4 | from .metric import scores
5 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/utils/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/datasets/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/datasets/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/datasets/__pycache__/cocostuff.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/datasets/__pycache__/cocostuff.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/utils/__pycache__/lr_scheduler.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/utils/__pycache__/lr_scheduler.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/__pycache__/deeplabv3plus.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PengtaoJiang/OAA-PyTorch/HEAD/deeplab-pytorch/libs/models/__pycache__/deeplabv3plus.cpython-36.pyc


--------------------------------------------------------------------------------
/deeplab-pytorch/scripts/setup_voc12.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | DATASET_DIR=$1
4 | 
5 | # Download PASCAL VOC12 (2GB)
6 | wget -nc -P $DATASET_DIR http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
7 | 
8 | # Extract images, annotations, etc.
9 | tar -xvf $DATASET_DIR/VOCtrainval_11-May-2012.tar -C $DATASET_DIR


--------------------------------------------------------------------------------
/deeplab-pytorch/data/datasets/voc12/labels.txt:
--------------------------------------------------------------------------------
 1 | 0	__background__
 2 | 1	aeroplane
 3 | 2	bicycle
 4 | 3	bird
 5 | 4	boat
 6 | 5	bottle
 7 | 6	bus
 8 | 7	car
 9 | 8	cat
10 | 9	chair
11 | 10	cow
12 | 11	diningtable
13 | 12	dog
14 | 13	horse
15 | 14	motorbike
16 | 15	person
17 | 16	pottedplant
18 | 17	sheep
19 | 18	sofa
20 | 19	train
21 | 20	tvmonitor


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .voc import VOC, VOCAug
 2 | from .cocostuff import CocoStuff10k, CocoStuff164k
 3 | 
 4 | 
 5 | def get_dataset(name):
 6 |     return {
 7 |         "cocostuff10k": CocoStuff10k,
 8 |         "cocostuff164k": CocoStuff164k,
 9 |         "voc": VOC,
10 |         "vocaug": VOCAug,
11 |     }[name]
12 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/configs/conda_env.yaml:
--------------------------------------------------------------------------------
 1 | name: deeplab-pytorch
 2 | dependencies:
 3 |   - click
 4 |   - conda-forge::pydensecrf
 5 |   - cudatoolkit=10.2
 6 |   - matplotlib
 7 |   - python=3.6
 8 |   - pytorch::pytorch>1.2.0
 9 |   - pytorch::torchvision
10 |   - pyyaml
11 |   - scipy
12 |   - tqdm
13 |   - pip:
14 |     - addict
15 |     - black
16 |     - joblib
17 |     - omegaconf
18 |     - opencv-python
19 |     - tensorflow
20 |     - torchnet
21 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | EXP=exp1
 3 | 
 4 | CUDA_VISIBLE_DEVICES=0  python3 ./scripts/test.py \
 5 |     --img_dir=./data/VOCdevkit/VOC2012/JPEGImages/ \
 6 |     --test_list=./data/voc12/train_cls.txt \
 7 |     --arch=vgg \
 8 |     --batch_size=1 \
 9 |     --dataset=pascal_voc \
10 |     --input_size=224 \
11 | 	  --num_classes=20 \
12 |     --restore_from=./runs/${EXP}/model/pascal_voc_epoch_14.pth \
13 |     --save_dir=./runs/${EXP}/attention/ \
14 | 


--------------------------------------------------------------------------------
/test_iam.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | EXP=exp2
 3 | 
 4 | CUDA_VISIBLE_DEVICES=0  python3 ./scripts/test_iam.py \
 5 |     --img_dir=./data/VOCdevkit/VOC2012/JPEGImages/ \
 6 |     --test_list=./data/voc12/train_cls.txt \
 7 |     --arch=vgg1 \
 8 |     --batch_size=1 \
 9 |     --dataset=pascal_voc \
10 |     --input_size=224 \
11 | 	--num_classes=20 \
12 |     --restore_from=./runs/${EXP}/model/pascal_voc_epoch_14.pth \
13 |     --save_dir=./runs/${EXP}/attention/ \
14 | 


--------------------------------------------------------------------------------
/utils/avgMeter.py:
--------------------------------------------------------------------------------
 1 | class AverageMeter(object):
 2 |     """Computes and stores the average and current value"""
 3 |     def __init__(self):
 4 |         self.reset()
 5 | 
 6 |     def reset(self):
 7 |         self.val = 0
 8 |         self.avg = 0
 9 |         self.sum = 0
10 |         self.count = 0
11 | 
12 |     def update(self, val, n=1):
13 |         self.val = val
14 |         self.sum += val * n
15 |         self.count += n
16 |         self.avg = self.sum / self.count
17 | 
18 | 


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | EXP=exp1
 3 | 
 4 | CUDA_VISIBLE_DEVICES=0 python3 ./scripts/train.py \
 5 |     --img_dir=./data/VOCdevkit/VOC2012/JPEGImages/ \
 6 |     --train_list=./data/voc12/train_cls.txt \
 7 |     --test_list=./data/voc12/val_cls.txt \
 8 |     --epoch=15 \
 9 |     --lr=0.001 \
10 |     --batch_size=1 \
11 |     --iter_size=5 \
12 |     --dataset=pascal_voc \
13 |     --input_size=224 \
14 |     --disp_interval=100 \
15 |     --num_classes=20 \
16 |     --num_workers=8 \
17 |     --snapshot_dir=./runs/${EXP}/model/  \
18 |     --accu_dir=./runs/${EXP}/accu_att/ \
19 |     --decay_points='10'
20 | 


--------------------------------------------------------------------------------
/train_iam.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | EXP=exp2
 3 | 
 4 | CUDA_VISIBLE_DEVICES=0 python3 ./scripts/train_iam.py \
 5 |     --img_dir=./data/VOCdevkit/VOC2012/JPEGImages/ \
 6 |     --train_list=./data/voc12/train_cls.txt \
 7 |     --test_list=./data/voc12/val_cls.txt \
 8 |     --epoch=15 \
 9 |     --lr=0.001 \
10 |     --batch_size=1 \
11 |     --iter_size=5 \
12 |     --dataset=pascal_voc \
13 |     --input_size=224 \
14 |     --disp_interval=100 \
15 |     --num_classes=20 \
16 |     --num_workers=8 \
17 |     --snapshot_dir=./runs/${EXP}/model/  \
18 |     --att_dir=./runs/exp1/accu_att/ \
19 |     --decay_points='10'
20 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/scripts/setup_cocostuff10k.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DATASET_DIR=$1
 4 | 
 5 | # Download COCO-Stuff 10k (2GB)
 6 | wget -nc -P $DATASET_DIR http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/cocostuff-10k-v1.1.zip
 7 | 
 8 | unzip -n $DATASET_DIR/cocostuff-10k-v1.1.zip -d $DATASET_DIR
 9 | 
10 | echo ===============================================================================================
11 | echo "Set the path below to \"ROOT:\" in the config/cocostuff10k.yaml:"
12 | echo -e "\033[32m $DATASET_DIR \033[00m"
13 | echo ===============================================================================================


--------------------------------------------------------------------------------
/train+.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | EXP=exp3
 3 | 
 4 | CUDA_VISIBLE_DEVICES=0 python3 ./scripts/train.py \
 5 |     --img_dir=./data/VOCdevkit/VOC2012/JPEGImages/ \
 6 |     --train_list=./data/voc12/train_cls.txt \
 7 |     --test_list=./data/voc12/val_cls.txt \
 8 |     --epoch=15 \
 9 |     --lr=0.001 \
10 |     --batch_size=1 \
11 |     --iter_size=5 \
12 |     --dataset=pascal_voc \
13 |     --input_size=224 \
14 |     --disp_interval=100 \
15 |     --num_classes=20 \
16 |     --num_workers=8 \
17 |     --snapshot_dir=./runs/${EXP}/model/  \
18 |     --att_dir=./runs/${EXP}/att/ \
19 |     --accu_dir=./runs/${EXP}/accu_att/ \
20 |     --decay_points='10' \
21 |     --drop_layer \
22 |     --drop_rate=0.5 \
23 |     --drop_threshold=0.6 \
24 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/scripts/setup_caffemodels.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Download released caffemodels
 4 | wget -nc -P ./data http://liangchiehchen.com/projects/released/deeplab_aspp_resnet101/prototxt_and_model.zip
 5 | 
 6 | unzip -n ./data/prototxt_and_model.zip -d ./data
 7 | 
 8 | # Move caffemodels to data directories
 9 | ## MSCOCO
10 | mv ./data/init.caffemodel ./data/models/coco/deeplabv1_resnet101/caffemodel
11 | ## PASCAL VOC 2012
12 | mv ./data/train_iter_20000.caffemodel ./data/models/voc12/deeplabv2_resnet101_msc/caffemodel
13 | mv ./data/train2_iter_20000.caffemodel ./data/models/voc12/deeplabv2_resnet101_msc/caffemodel
14 | 
15 | echo ===============================================================================================
16 | echo "Next, try running script below:"
17 | echo -e "\033[32m python convert.py --dataset coco \033[00m"
18 | echo ===============================================================================================
19 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/scripts/setup_cocostuff164k.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DATASET_DIR=$1
 4 | 
 5 | # Download COCO-Stuff 164k (20GB+)
 6 | wget -nc -P $DATASET_DIR http://images.cocodataset.org/zips/train2017.zip
 7 | wget -nc -P $DATASET_DIR http://images.cocodataset.org/zips/val2017.zip
 8 | wget -nc -P $DATASET_DIR http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip
 9 | 
10 | mkdir -p $DATASET_DIR/images
11 | mkdir -p $DATASET_DIR/annotations
12 | unzip -n $DATASET_DIR/train2017.zip -d $DATASET_DIR/images/
13 | unzip -n $DATASET_DIR/val2017.zip -d $DATASET_DIR/images/
14 | unzip -n $DATASET_DIR/stuffthingmaps_trainval2017.zip -d $DATASET_DIR/annotations/
15 | 
16 | echo ===============================================================================================
17 | echo "Set the path below to \"ROOT:\" in the config/cocostuff164k.yaml:"
18 | echo -e "\033[32m $DATASET_DIR \033[00m"
19 | echo ===============================================================================================


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/utils/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | #
 4 | # Author: Kazuto Nakashima
 5 | # URL:    https://kazuto1011.github.io
 6 | # Date:   09 January 2019
 7 | 
 8 | 
 9 | from torch.optim.lr_scheduler import _LRScheduler
10 | 
11 | 
12 | class PolynomialLR(_LRScheduler):
13 |     def __init__(self, optimizer, step_size, iter_max, power, last_epoch=-1):
14 |         self.step_size = step_size
15 |         self.iter_max = iter_max
16 |         self.power = power
17 |         super(PolynomialLR, self).__init__(optimizer, last_epoch)
18 | 
19 |     def polynomial_decay(self, lr):
20 |         return lr * (1 - float(self.last_epoch) / self.iter_max) ** self.power
21 | 
22 |     def get_lr(self):
23 |         if (
24 |             (self.last_epoch == 0)
25 |             or (self.last_epoch % self.step_size != 0)
26 |             or (self.last_epoch > self.iter_max)
27 |         ):
28 |             return [group["lr"] for group in self.optimizer.param_groups]
29 |         return [self.polynomial_decay(lr) for lr in self.base_lrs]
30 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/configs/coco.yaml:
--------------------------------------------------------------------------------
 1 | EXP:
 2 |     ID: coco
 3 |     OUTPUT_DIR: data
 4 | 
 5 | DATASET:
 6 |     NAME: coco
 7 |     ROOT:
 8 |     LABELS: ./data/datasets/coco/labels.txt
 9 |     N_CLASSES: 91
10 |     IGNORE_LABEL:
11 |     SCALES:
12 |     SPLIT:
13 |         TRAIN:
14 |         VAL:
15 |         TEST:
16 | 
17 | DATALOADER:
18 |     NUM_WORKERS: 0
19 | 
20 | IMAGE:
21 |     MEAN:
22 |         R: 122.675
23 |         G: 116.669
24 |         B: 104.008
25 |     SIZE:
26 |         BASE:
27 |         TRAIN:
28 |         TEST: 513
29 | 
30 | MODEL:
31 |     NAME: DeepLabV1_ResNet101
32 |     N_BLOCKS: [3, 4, 23, 3]
33 |     ATROUS_RATES:
34 |     INIT_MODEL:
35 | 
36 | SOLVER:
37 |     BATCH_SIZE:
38 |         TRAIN: 5
39 |         TEST: 1
40 |     ITER_MAX: 100000
41 |     ITER_SIZE: 2
42 |     ITER_SAVE: 5000
43 |     ITER_TB: 100
44 |     LR_DECAY: 10
45 |     LR: 2.5e-4
46 |     MOMENTUM: 0.9
47 |     OPTIMIZER: sgd
48 |     POLY_POWER: 0.9
49 |     WEIGHT_DECAY: 5.0e-4
50 |     AVERAGE_LOSS: 20
51 | 
52 | CRF:
53 |     ITER_MAX: 10
54 |     POS_W: 3
55 |     POS_XY_STD: 1
56 |     BI_W: 4
57 |     BI_XY_STD: 67
58 |     BI_RGB_STD: 3
59 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Kazuto Nakashima
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/scripts/train_eval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | 
 6 | # 0. Choose from {voc12, cocostuff10k, cocostuff164k}
 7 | DATASET=voc12
 8 | 
 9 | 
10 | # 1. Train DeepLab v2 on ${DATASET}
11 | python main.py train \
12 | -c configs/${DATASET}.yaml
13 | 
14 | # Trained models are saved into
15 | #   data/models/${DATASET}/deeplabv2_resnet101_msc/*/checkpoint_5000.pth
16 | #   data/models/${DATASET}/deeplabv2_resnet101_msc/*/checkpoint_10000.pth
17 | #   data/models/${DATASET}/deeplabv2_resnet101_msc/*/checkpoint_15000.pth
18 | #   ...
19 | 
20 | # Tensorboard logs are in data/logs.
21 | 
22 | 
23 | # 2. Evaluate the model on val set
24 | python main.py test \
25 | -c configs/${DATASET}.yaml \
26 | -m data/models/${DATASET}/deeplabv2_resnet101_msc/*/checkpoint_final.pth
27 | 
28 | # Validation scores on 4 metrics are saved as
29 | #   data/scores/${DATASET}/deeplabv2_resnet101_msc/*/scores.json
30 | 
31 | # Logits are saved into
32 | #   data/features/${DATASET}/deeplabv2_resnet101_msc/*/logit/...
33 | 
34 | 
35 | # 3. Re-evaluate the model with CRF post-processing
36 | python main.py crf \
37 | -c configs/${DATASET}.yaml
38 | 
39 | # Scores with CRF on 4 metrics are saved as
40 | #   data/scores/${DATASET}/deeplabv2_resnet101_msc/*/scores_crf.json


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/utils/crf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | #
 4 | # Author: Kazuto Nakashima
 5 | # URL:    https://kazuto1011.github.io
 6 | # Date:   09 January 2019
 7 | 
 8 | 
 9 | import numpy as np
10 | import pydensecrf.densecrf as dcrf
11 | import pydensecrf.utils as utils
12 | 
13 | 
14 | class DenseCRF(object):
15 |     def __init__(self, iter_max, pos_w, pos_xy_std, bi_w, bi_xy_std, bi_rgb_std):
16 |         self.iter_max = iter_max
17 |         self.pos_w = pos_w
18 |         self.pos_xy_std = pos_xy_std
19 |         self.bi_w = bi_w
20 |         self.bi_xy_std = bi_xy_std
21 |         self.bi_rgb_std = bi_rgb_std
22 | 
23 |     def __call__(self, image, probmap):
24 |         C, H, W = probmap.shape
25 | 
26 |         U = utils.unary_from_softmax(probmap)
27 |         U = np.ascontiguousarray(U)
28 | 
29 |         image = np.ascontiguousarray(image)
30 | 
31 |         d = dcrf.DenseCRF2D(W, H, C)
32 |         d.setUnaryEnergy(U)
33 |         d.addPairwiseGaussian(sxy=self.pos_xy_std, compat=self.pos_w)
34 |         d.addPairwiseBilateral(
35 |             sxy=self.bi_xy_std, srgb=self.bi_rgb_std, rgbim=image, compat=self.bi_w
36 |         )
37 | 
38 |         Q = d.inference(self.iter_max)
39 |         Q = np.array(Q).reshape((C, H, W))
40 | 
41 |         return Q
42 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/configs/voc12.yaml:
--------------------------------------------------------------------------------
 1 | EXP:
 2 |     ID: voc12
 3 |     OUTPUT_DIR: data
 4 | 
 5 | DATASET:
 6 |     NAME: vocaug
 7 |     ROOT: ./../data/VOCdevkit/VOC2012/
 8 |     LABELS: ./data/datasets/voc12/labels.txt
 9 |     N_CLASSES: 21
10 |     IGNORE_LABEL: 255
11 |     SCALES: [0.5, 0.75, 1.0, 1.25, 1.5]
12 |     SPLIT:
13 |         TRAIN: train_aug
14 |         VAL: val
15 |         TEST: test
16 | 
17 | DATALOADER:
18 |     NUM_WORKERS: 0
19 | 
20 | IMAGE:
21 |     MEAN:
22 |         R: 122.675
23 |         G: 116.669
24 |         B: 104.008
25 |     SIZE:
26 |         BASE: # None
27 |         TRAIN: 321
28 |         TEST: 513
29 | 
30 | MODEL:
31 |     NAME: DeepLabV2_ResNet101_MSC
32 |     N_BLOCKS: [3, 4, 23, 3]
33 |     ATROUS_RATES: [6, 12, 18, 24]
34 |     INIT_MODEL: ./data/models/coco/deeplabv1_resnet101/caffemodel/deeplabv1_resnet101-coco.pth
35 | 
36 | SOLVER:
37 |     BATCH_SIZE:
38 |         TRAIN: 5
39 |         TEST: 1
40 |     ITER_MAX: 20000
41 |     ITER_SIZE: 2
42 |     ITER_SAVE: 5000
43 |     ITER_TB: 100
44 |     LR_DECAY: 10
45 |     LR: 2.5e-4
46 |     MOMENTUM: 0.9
47 |     OPTIMIZER: sgd
48 |     POLY_POWER: 0.9
49 |     WEIGHT_DECAY: 5.0e-4
50 |     AVERAGE_LOSS: 20
51 | 
52 | CRF:
53 |     ITER_MAX: 10
54 |     POS_W: 3
55 |     POS_XY_STD: 1
56 |     BI_W: 4
57 |     BI_XY_STD: 67
58 |     BI_RGB_STD: 3
59 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/configs/cocostuff10k.yaml:
--------------------------------------------------------------------------------
 1 | EXP:
 2 |     ID: cocostuff10k
 3 |     OUTPUT_DIR: data
 4 | 
 5 | DATASET:
 6 |     NAME: cocostuff10k
 7 |     ROOT: /media/kazuto1011/Extra/cocostuff/cocostuff-10k-v1.1
 8 |     LABELS: ./data/datasets/cocostuff/labels.txt
 9 |     N_CLASSES: 182
10 |     IGNORE_LABEL: 255
11 |     SCALES: [0.5, 0.75, 1.0, 1.25, 1.5]
12 |     SPLIT:
13 |         TRAIN: train
14 |         VAL: test
15 |         TEST:
16 | 
17 | DATALOADER:
18 |     NUM_WORKERS: 0
19 | 
20 | IMAGE:
21 |     MEAN:
22 |         R: 122.675
23 |         G: 116.669
24 |         B: 104.008
25 |     SIZE:
26 |         BASE:
27 |         TRAIN: 321
28 |         TEST: 513
29 | 
30 | MODEL:
31 |     NAME: DeepLabV2_ResNet101_MSC
32 |     N_BLOCKS: [3, 4, 23, 3]
33 |     ATROUS_RATES: [6, 12, 18, 24]
34 |     INIT_MODEL: data/models/coco/deeplabv1_resnet101/caffemodel/deeplabv1_resnet101-coco.pth
35 | 
36 | SOLVER:
37 |     BATCH_SIZE:
38 |         TRAIN: 5
39 |         TEST: 5
40 |     ITER_MAX: 20000
41 |     ITER_SIZE: 2
42 |     ITER_SAVE: 5000
43 |     ITER_TB: 100
44 |     LR_DECAY: 10
45 |     LR: 2.5e-4
46 |     MOMENTUM: 0.9
47 |     OPTIMIZER: sgd
48 |     POLY_POWER: 0.9
49 |     WEIGHT_DECAY: 5.0e-4
50 |     AVERAGE_LOSS: 20
51 | 
52 | CRF:
53 |     ITER_MAX: 10
54 |     POS_W: 3
55 |     POS_XY_STD: 1
56 |     BI_W: 4
57 |     BI_XY_STD: 67
58 |     BI_RGB_STD: 3
59 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/configs/cocostuff164k.yaml:
--------------------------------------------------------------------------------
 1 | EXP:
 2 |     ID: cocostuff164k
 3 |     OUTPUT_DIR: data
 4 | 
 5 | DATASET:
 6 |     NAME: cocostuff164k
 7 |     ROOT: /media/kazuto1011/Extra/cocostuff/cocostuff-164k
 8 |     LABELS: ./data/datasets/cocostuff/labels.txt
 9 |     N_CLASSES: 182
10 |     IGNORE_LABEL: 255
11 |     SCALES: [0.5, 0.75, 1.0, 1.25, 1.5]
12 |     SPLIT:
13 |         TRAIN: train2017
14 |         VAL: val2017
15 |         TEST:
16 | 
17 | DATALOADER:
18 |     NUM_WORKERS: 0
19 | 
20 | IMAGE:
21 |     MEAN:
22 |         R: 122.675
23 |         G: 116.669
24 |         B: 104.008
25 |     SIZE:
26 |         BASE: # None
27 |         TRAIN: 321
28 |         TEST: 513
29 | 
30 | MODEL:
31 |     NAME: DeepLabV2_ResNet101_MSC
32 |     N_BLOCKS: [3, 4, 23, 3]
33 |     ATROUS_RATES: [6, 12, 18, 24]
34 |     INIT_MODEL: data/models/coco/deeplabv1_resnet101/caffemodel/deeplabv1_resnet101-coco.pth
35 | 
36 | SOLVER:
37 |     BATCH_SIZE:
38 |         TRAIN: 5
39 |         TEST: 1
40 |     ITER_MAX: 100000
41 |     ITER_SIZE: 2
42 |     ITER_SAVE: 5000
43 |     ITER_TB: 100
44 |     LR_DECAY: 10
45 |     LR: 2.5e-4
46 |     MOMENTUM: 0.9
47 |     OPTIMIZER: sgd
48 |     POLY_POWER: 0.9
49 |     WEIGHT_DECAY: 5.0e-4
50 |     AVERAGE_LOSS: 20
51 | 
52 | CRF:
53 |     ITER_MAX: 10
54 |     POS_W: 3
55 |     POS_XY_STD: 1
56 |     BI_W: 4
57 |     BI_XY_STD: 67
58 |     BI_RGB_STD: 3
59 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/utils/metric.py:
--------------------------------------------------------------------------------
 1 | # Originally written by wkentaro
 2 | # https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/utils.py
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | def _fast_hist(label_true, label_pred, n_class):
 8 |     mask = (label_true >= 0) & (label_true < n_class)
 9 |     hist = np.bincount(
10 |         n_class * label_true[mask].astype(int) + label_pred[mask],
11 |         minlength=n_class ** 2,
12 |     ).reshape(n_class, n_class)
13 |     return hist
14 | 
15 | 
16 | def scores(label_trues, label_preds, n_class):
17 |     hist = np.zeros((n_class, n_class))
18 |     for lt, lp in zip(label_trues, label_preds):
19 |         hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
20 |     acc = np.diag(hist).sum() / hist.sum()
21 |     acc_cls = np.diag(hist) / hist.sum(axis=1)
22 |     acc_cls = np.nanmean(acc_cls)
23 |     iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
24 |     valid = hist.sum(axis=1) > 0  # added
25 |     mean_iu = np.nanmean(iu[valid])
26 |     freq = hist.sum(axis=1) / hist.sum()
27 |     fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
28 |     cls_iu = dict(zip(range(n_class), iu))
29 | 
30 |     return {
31 |         "Pixel Accuracy": acc,
32 |         "Mean Accuracy": acc_cls,
33 |         "Frequency Weighted IoU": fwavacc,
34 |         "Mean IoU": mean_iu,
35 |         "Class IoU": cls_iu,
36 |     }
37 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/data/datasets/coco/labels.txt:
--------------------------------------------------------------------------------
 1 | 0	background
 2 | 1	person
 3 | 2	bicycle
 4 | 3	car
 5 | 4	motorcycle
 6 | 5	airplane
 7 | 6	bus
 8 | 7	train
 9 | 8	truck
10 | 9	boat
11 | 10	traffic light
12 | 11	fire hydrant
13 | 12	street sign
14 | 13	stop sign
15 | 14	parking meter
16 | 15	bench
17 | 16	bird
18 | 17	cat
19 | 18	dog
20 | 19	horse
21 | 20	sheep
22 | 21	cow
23 | 22	elephant
24 | 23	bear
25 | 24	zebra
26 | 25	giraffe
27 | 26	hat
28 | 27	backpack
29 | 28	umbrella
30 | 29	shoe
31 | 30	eye glasses
32 | 31	handbag
33 | 32	tie
34 | 33	suitcase
35 | 34	frisbee
36 | 35	skis
37 | 36	snowboard
38 | 37	sports ball
39 | 38	kite
40 | 39	baseball bat
41 | 40	baseball glove
42 | 41	skateboard
43 | 42	surfboard
44 | 43	tennis racket
45 | 44	bottle
46 | 45	plate
47 | 46	wine glass
48 | 47	cup
49 | 48	fork
50 | 49	knife
51 | 50	spoon
52 | 51	bowl
53 | 52	banana
54 | 53	apple
55 | 54	sandwich
56 | 55	orange
57 | 56	broccoli
58 | 57	carrot
59 | 58	hot dog
60 | 59	pizza
61 | 60	donut
62 | 61	cake
63 | 62	chair
64 | 63	couch
65 | 64	potted plant
66 | 65	bed
67 | 66	mirror
68 | 67	dining table
69 | 68	window
70 | 69	desk
71 | 70	toilet
72 | 71	door
73 | 72	tv
74 | 73	laptop
75 | 74	mouse
76 | 75	remote
77 | 76	keyboard
78 | 77	cell phone
79 | 78	microwave
80 | 79	oven
81 | 80	toaster
82 | 81	sink
83 | 82	refrigerator
84 | 83	blender
85 | 84	book
86 | 85	clock
87 | 86	vase
88 | 87	scissors
89 | 88	teddy bear
90 | 89	hair drier
91 | 90	toothbrush


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/msc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | #
 4 | # Author:   Kazuto Nakashima
 5 | # URL:      http://kazuto1011.github.io
 6 | # Created:  2018-03-26
 7 | 
 8 | import torch
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | 
12 | 
13 | class MSC(nn.Module):
14 |     """
15 |     Multi-scale inputs
16 |     """
17 | 
18 |     def __init__(self, base, scales=None):
19 |         super(MSC, self).__init__()
20 |         self.base = base
21 |         if scales:
22 |             self.scales = scales
23 |         else:
24 |             self.scales = [0.5, 0.75]
25 | 
26 |     def forward(self, x):
27 |         # Original
28 |         logits = self.base(x)
29 |         _, _, H, W = logits.shape
30 |         interp = lambda l: F.interpolate(
31 |             l, size=(H, W), mode="bilinear", align_corners=False
32 |         )
33 | 
34 |         # Scaled
35 |         logits_pyramid = []
36 |         for p in self.scales:
37 |             h = F.interpolate(x, scale_factor=p, mode="bilinear", align_corners=False)
38 |             logits_pyramid.append(self.base(h))
39 | 
40 |         # Pixel-wise max
41 |         logits_all = [logits] + [interp(l) for l in logits_pyramid]
42 |         logits_max = torch.max(torch.stack(logits_all), dim=0)[0]
43 | 
44 |         if self.training:
45 |             return [logits] + logits_pyramid + [logits_max]
46 |         else:
47 |             return logits_max
48 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/deeplabv1.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | #
 4 | # Author: Kazuto Nakashima
 5 | # URL:    https://kazuto1011.github.io
 6 | # Date:   19 February 2019
 7 | 
 8 | from __future__ import absolute_import, print_function
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | 
14 | from .resnet import _ResLayer, _Stem
15 | 
16 | 
17 | class DeepLabV1(nn.Sequential):
18 |     """
19 |     DeepLab v1: Dilated ResNet + 1x1 Conv
20 |     Note that this is just a container for loading the pretrained COCO model and not mentioned as "v1" in papers.
21 |     """
22 | 
23 |     def __init__(self, n_classes, n_blocks):
24 |         super(DeepLabV1, self).__init__()
25 |         ch = [64 * 2 ** p for p in range(6)]
26 |         self.add_module("layer1", _Stem(ch[0]))
27 |         self.add_module("layer2", _ResLayer(n_blocks[0], ch[0], ch[2], 1, 1))
28 |         self.add_module("layer3", _ResLayer(n_blocks[1], ch[2], ch[3], 2, 1))
29 |         self.add_module("layer4", _ResLayer(n_blocks[2], ch[3], ch[4], 1, 2))
30 |         self.add_module("layer5", _ResLayer(n_blocks[3], ch[4], ch[5], 1, 4))
31 |         self.add_module("fc", nn.Conv2d(2048, n_classes, 1))
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     model = DeepLabV1(n_classes=21, n_blocks=[3, 4, 23, 3])
36 |     model.eval()
37 |     image = torch.randn(1, 3, 513, 513)
38 | 
39 |     print(model)
40 |     print("input:", image.shape)
41 |     print("output:", model(image).shape)
42 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/data/datasets/cocostuff/README.md:
--------------------------------------------------------------------------------
 1 | # COCO-Stuff
 2 | 
 3 | This is an instruction for setting up COCO-Stuff dataset.
 4 | COCO-Stuff 164k is the latest version and recommended.
 5 | 
 6 | ![](../../../docs/datasets/cocostuff.png)
 7 | 
 8 | ## COCO-Stuff 164k
 9 | 
10 | ### Setup
11 | 
12 | 1. Run the script below to download the dataset (20GB+).
13 | 
14 | ```sh
15 | $ bash ./scripts/setup_cocostuff164k.sh [PATH TO DOWNLOAD]
16 | ```
17 | 
18 | 2. Set the path to the dataset in ```configs/cocostuff164k.yaml```.
19 | 
20 | ```yaml
21 | DATASET: cocostuff164k
22 |     ROOT: # <- Write here
23 | ...
24 | ```
25 | 
26 | ### Dataset structure
27 | 
28 | ```
29 | ├── images
30 | │   ├── train2017
31 | │   │   ├── 000000000009.jpg
32 | │   │   └── ...
33 | │   └── val2017
34 | │       ├── 000000000139.jpg
35 | │       └── ...
36 | └── annotations
37 |     ├── train2017
38 |     │   ├── 000000000009.png
39 |     │   └── ...
40 |     └── val2017
41 |         ├── 000000000139.png
42 |         └── ...
43 | ```
44 | 
45 | ## COCO-Stuff 10k
46 | 
47 | ### Setup
48 | 
49 | 1. Run the script below to download the dataset (2GB).
50 | 
51 | ```sh
52 | $ bash ./scripts/setup_cocostuff10k.sh [PATH TO DOWNLOAD]
53 | ```
54 | 
55 | 2. Set the path to the dataset in ```configs/cocostuff10k.yaml```.
56 | 
57 | ```yaml
58 | DATASET: cocostuff10k
59 |     ROOT: # <- Write here
60 | ...
61 | ```
62 | 
63 | ### Dataset structure
64 | 
65 | ```
66 | ├── images
67 | │   ├── COCO_train2014_000000000077.jpg
68 | │   └── ...
69 | ├── annotations
70 | │   ├── COCO_train2014_000000000077.mat
71 | │   └── ...
72 | └── imageLists
73 |     ├── all.txt
74 |     ├── test.txt
75 |     └── train.txt
76 | ```
77 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/hubconf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | #
 4 | # Author: Kazuto Nakashima
 5 | # URL:    https://kazuto1011.github.io
 6 | # Date:   20 December 2018
 7 | 
 8 | from __future__ import print_function
 9 | 
10 | from torch.hub import load_state_dict_from_url
11 | 
12 | model_url_root = "https://github.com/kazuto1011/deeplab-pytorch/releases/download/v1.0/"
13 | model_dict = {
14 |     "cocostuff10k": ("deeplabv2_resnet101_msc-cocostuff10k-20000.pth", 182),
15 |     "cocostuff164k": ("deeplabv2_resnet101_msc-cocostuff164k-100000.pth", 182),
16 |     "voc12": ("deeplabv2_resnet101_msc-vocaug-20000.pth", 21),
17 | }
18 | 
19 | 
20 | def deeplabv2_resnet101(pretrained=None, n_classes=182, scales=None):
21 | 
22 |     from libs.models.deeplabv2 import DeepLabV2
23 |     from libs.models.msc import MSC
24 | 
25 |     # Model parameters
26 |     n_blocks = [3, 4, 23, 3]
27 |     atrous_rates = [6, 12, 18, 24]
28 |     if scales is None:
29 |         scales = [0.5, 0.75]
30 | 
31 |     base = DeepLabV2(n_classes=n_classes, n_blocks=n_blocks, atrous_rates=atrous_rates)
32 |     model = MSC(base=base, scales=scales)
33 | 
34 |     # Load pretrained models
35 |     if isinstance(pretrained, str):
36 | 
37 |         assert pretrained in model_dict, list(model_dict.keys())
38 |         expected = model_dict[pretrained][1]
39 |         error_message = "Expected: n_classes={}".format(expected)
40 |         assert n_classes == expected, error_message
41 | 
42 |         model_url = model_url_root + model_dict[pretrained][0]
43 |         state_dict = load_state_dict_from_url(model_url)
44 |         model.load_state_dict(state_dict)
45 | 
46 |     return model
47 | 
48 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/data/datasets/voc12/README.md:
--------------------------------------------------------------------------------
 1 | # PASCAL VOC 2012
 2 | 
 3 | This is an instruction for setting up PASCAL VOC dataset.
 4 | 
 5 | ![](../../../docs/datasets/voc12.png)
 6 | 
 7 | 1. Download PASCAL VOC 2012.
 8 | 
 9 | ```sh
10 | $ bash scripts/setup_voc12.sh [PATH TO DOWNLOAD]
11 | ```
12 | 
13 | ```
14 | /VOCdevkit
15 | └── VOC2012
16 |     ├── Annotations
17 |     ├── ImageSets
18 |     │   └── Segmentation
19 |     ├── JPEGImages
20 |     ├── SegmentationObject
21 |     └── SegmentationClass
22 | ```
23 | 
24 | 2. Add SBD augmentated training data as `SegmentationClassAug`.
25 | 
26 | 
27 | * Convert by yourself ([here](https://github.com/shelhamer/fcn.berkeleyvision.org/tree/master/data/pascal)).
28 | * Or download pre-converted files ([here](https://github.com/DrSleep/tensorflow-deeplab-resnet#evaluation)).
29 | 
30 | 3. Download official image sets as `ImageSets/SegmentationAug`.
31 | 
32 | * From https://ucla.app.box.com/s/rd9z2xvwsfpksi7mi08i2xqrj7ab4keb/file/55053033642
33 | * Or https://github.com/kazuto1011/deeplab-pytorch/files/2945588/list.zip
34 | 
35 | ```sh
36 | /VOCdevkit
37 | └── VOC2012
38 |     ├── Annotations
39 |     ├── ImageSets
40 |     │   ├── Segmentation
41 |     │   └── SegmentationAug # ADDED!!
42 |     │       ├── test.txt
43 |     │       ├── train_aug.txt
44 |     │       ├── train.txt
45 |     │       ├── trainval_aug.txt
46 |     │       ├── trainval.txt
47 |     │       └── val.txt
48 |     ├── JPEGImages
49 |     ├── SegmentationObject
50 |     ├── SegmentationClass
51 |     └── SegmentationClassAug # ADDED!!
52 |         └── 2007_000032.png
53 | ```
54 | 
55 | 1. Set the path to the dataset in ```configs/voc12.yaml```.
56 | 
57 | ```yaml
58 | DATASET: voc12
59 |     ROOT: # <- Write here
60 | ...
61 | ```
62 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/deeplabv2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | #
 4 | # Author:   Kazuto Nakashima
 5 | # URL:      http://kazuto1011.github.io
 6 | # Created:  2017-11-19
 7 | 
 8 | from __future__ import absolute_import, print_function
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | 
14 | from .resnet import _ConvBnReLU, _ResLayer, _Stem
15 | 
16 | 
17 | class _ASPP(nn.Module):
18 |     """
19 |     Atrous spatial pyramid pooling (ASPP)
20 |     """
21 | 
22 |     def __init__(self, in_ch, out_ch, rates):
23 |         super(_ASPP, self).__init__()
24 |         for i, rate in enumerate(rates):
25 |             self.add_module(
26 |                 "c{}".format(i),
27 |                 nn.Conv2d(in_ch, out_ch, 3, 1, padding=rate, dilation=rate, bias=True),
28 |             )
29 | 
30 |         for m in self.children():
31 |             nn.init.normal_(m.weight, mean=0, std=0.01)
32 |             nn.init.constant_(m.bias, 0)
33 | 
34 |     def forward(self, x):
35 |         return sum([stage(x) for stage in self.children()])
36 | 
37 | 
38 | class DeepLabV2(nn.Sequential):
39 |     """
40 |     DeepLab v2: Dilated ResNet + ASPP
41 |     Output stride is fixed at 8
42 |     """
43 | 
44 |     def __init__(self, n_classes, n_blocks, atrous_rates):
45 |         super(DeepLabV2, self).__init__()
46 |         ch = [64 * 2 ** p for p in range(6)]
47 |         self.add_module("layer1", _Stem(ch[0]))
48 |         self.add_module("layer2", _ResLayer(n_blocks[0], ch[0], ch[2], 1, 1))
49 |         self.add_module("layer3", _ResLayer(n_blocks[1], ch[2], ch[3], 2, 1))
50 |         self.add_module("layer4", _ResLayer(n_blocks[2], ch[3], ch[4], 1, 2))
51 |         self.add_module("layer5", _ResLayer(n_blocks[3], ch[4], ch[5], 1, 4))
52 |         self.add_module("aspp", _ASPP(ch[5], n_classes, atrous_rates))
53 | 
54 |     def freeze_bn(self):
55 |         for m in self.modules():
56 |             if isinstance(m, _ConvBnReLU.BATCH_NORM):
57 |                 m.eval()
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     model = DeepLabV2(
62 |         n_classes=21, n_blocks=[3, 4, 23, 3], atrous_rates=[6, 12, 18, 24]
63 |     )
64 |     model.eval()
65 |     image = torch.randn(1, 3, 513, 513)
66 | 
67 |     print(model)
68 |     print("input:", image.shape)
69 |     print("output:", model(image).shape)
70 | 


--------------------------------------------------------------------------------
/res.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import logging
 4 | import os 
 5 | from os.path import exists 
 6 | import matplotlib as mpl
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | colormaps = ['#000000', '#7F0000', '#007F00', '#7F7F00', '#00007F', '#7F007F', '#007F7F', '#7F7F7F', '#3F0000', '#BF0000', '#3F7F00',
10 |                         '#BF7F00', '#3F007F', '#BF007F', '#3F7F7F', '#BF7F7F', '#003F00', '#7F3F00', '#00BF00', '#7FBF00', '#003F7F']
11 | 
12 | def colormap(index):
13 |     return mpl.colors.LinearSegmentedColormap.from_list('cmap', [colormaps[0], colormaps[index+1], '#FFFFFF'], 256)
14 |     
15 | def load_dataset(test_lst):
16 |     logging.info('Beginning loading dataset...')
17 |     im_lst = []
18 |     label_lst = []
19 |     with open(test_lst) as f:
20 |         test_names = f.readlines()
21 |     lines = open(test_lst).read().splitlines()
22 |     for line in lines:
23 |         fields = line.split()
24 |         im_name = fields[0]
25 |         im_labels = []
26 |         for i in range(len(fields)-1):
27 |             im_labels.append(int(fields[i+1]))
28 |         im_lst.append(im_name)
29 |         label_lst.append(im_labels)
30 |     return im_lst, label_lst
31 | 
32 | if __name__ == '__main__':
33 |     
34 |     train_lst = '/home/ubuntu/Project/datasets/VOCdevkit/VOC2012/ImageSets/Segmentation/train_cls.txt'
35 |     root_folder = '/home/ubuntu/Project/datasets/VOCdevkit/VOC2012'
36 |     im_lst, label_lst = load_dataset(train_lst)
37 |     
38 |     atten_path = './runs/exp3/accu_att'
39 |     save_path = './runs/exp3/accu_att_zoom'
40 |     if not exists(save_path):
41 |         os.mkdir(save_path)
42 |     for i in range(len(im_lst)):
43 |         im_name = '{}/JPEGImages/{}.jpg'.format(root_folder, im_lst[i])
44 |         im_labels = label_lst[i]
45 |         
46 |         img = cv2.imread(im_name)
47 |         height, width = img.shape[:2]
48 |         im_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
49 |         
50 |         for label in im_labels:
51 |             att_name = '{}/{}_{}.png'.format(atten_path, i, label)
52 |             if not exists(att_name):
53 |                 continue 
54 |             att = cv2.imread(att_name, 0)
55 | 
56 | 
57 |             att = cv2.resize(att, (width, height), interpolation=cv2.INTER_CUBIC)
58 |             min_value = np.min(att)
59 |             max_value = np.max(att)
60 |             att = (att - min_value) / (max_value - min_value + 1e-8)
61 |             att = np.array(att*255, dtype = np.uint8)
62 |             
63 |             #att = im_gray * 0.2 + att * 0.8
64 |             save_name = '{}/{}_{}.png'.format(save_path, im_lst[i], label)
65 |             #plt.imsave(save_name, att, cmap=plt.cm.jet)
66 |             #plt.imsave(save_name, att, cmap=colormap(label))
67 |             cv2.imwrite(save_name, att)
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/runs/exp1/res.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import logging
 4 | import os 
 5 | from os.path import exists 
 6 | import matplotlib as mpl
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | colormaps = ['#000000', '#7F0000', '#007F00', '#7F7F00', '#00007F', '#7F007F', '#007F7F', '#7F7F7F', '#3F0000', '#BF0000', '#3F7F00',
10 |                         '#BF7F00', '#3F007F', '#BF007F', '#3F7F7F', '#BF7F7F', '#003F00', '#7F3F00', '#00BF00', '#7FBF00', '#003F7F']
11 | 
12 | def colormap(index):
13 |     return mpl.colors.LinearSegmentedColormap.from_list('cmap', [colormaps[0], colormaps[index+1], '#FFFFFF'], 256)
14 |     
15 | def load_dataset(test_lst):
16 |     logging.info('Beginning loading dataset...')
17 |     im_lst = []
18 |     label_lst = []
19 |     with open(test_lst) as f:
20 |         test_names = f.readlines()
21 |     lines = open(test_lst).read().splitlines()
22 |     for line in lines:
23 |         fields = line.split()
24 |         im_name = fields[0]
25 |         im_labels = []
26 |         for i in range(len(fields)-1):
27 |             im_labels.append(int(fields[i+1]))
28 |         im_lst.append(im_name)
29 |         label_lst.append(im_labels)
30 |     return im_lst, label_lst
31 | 
32 | if __name__ == '__main__':
33 |     
34 |     train_lst = '/home/miao/Projects/Classification/data/VOCdevkit/VOC2012/ImageSets/Segmentation/train_cls.txt'
35 |     root_folder = '/home/miao/Projects/Classification/data/VOCdevkit/VOC2012'
36 |     im_lst, label_lst = load_dataset(train_lst)
37 |     
38 |     atten_path = 'accu_att'
39 |     save_path = 'accu_att_zoom'
40 |     if not exists(save_path):
41 |         os.mkdir(save_path)
42 |     for i in range(len(im_lst)):
43 |         im_name = '{}/JPEGImages/{}.jpg'.format(root_folder, im_lst[i])
44 |         im_labels = label_lst[i]
45 |         
46 |         img = cv2.imread(im_name)
47 |         height, width = img.shape[:2]
48 |         im_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
49 |         
50 |         for label in im_labels:
51 |             att_name = '{}/{}_{}.png'.format(atten_path, i, label)
52 |             if not exists(att_name):
53 |                 continue 
54 |             att = cv2.imread(att_name, 0)
55 | 
56 | 
57 |             att = cv2.resize(att, (width, height), interpolation=cv2.INTER_CUBIC)
58 |             min_value = np.min(att)
59 |             max_value = np.max(att)
60 |             att = (att - min_value) / (max_value - min_value + 1e-8)
61 |             att = np.array(att*255, dtype = np.uint8)
62 |             
63 |             att = im_gray * 0.2 + att * 0.8
64 |             save_name = '{}/{}_{}.png'.format(save_path, im_lst[i], label)
65 |             #plt.imsave(save_name, att, cmap=plt.cm.jet)
66 |             plt.imsave(save_name, att, cmap=colormap(label))
67 |             #cv2.imwrite(save_name, att)
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from .resnet import *
 3 | from .deeplabv1 import *
 4 | from .deeplabv2 import *
 5 | from .deeplabv3 import *
 6 | from .deeplabv3plus import *
 7 | from .msc import *
 8 | 
 9 | 
10 | def init_weights(module):
11 |     if isinstance(module, nn.Conv2d):
12 |         nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
13 |         if module.bias is not None:
14 |             nn.init.constant_(module.bias, 0)
15 |     elif isinstance(module, nn.Linear):
16 |         nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
17 |         if module.bias is not None:
18 |             nn.init.constant_(module.bias, 0)
19 |     elif isinstance(module, nn.BatchNorm2d):
20 |         nn.init.constant_(module.weight, 1)
21 |         if module.bias is not None:
22 |             nn.init.constant_(module.bias, 0)
23 | 
24 | 
25 | def ResNet101(n_classes):
26 |     return ResNet(n_classes=n_classes, n_blocks=[3, 4, 23, 3])
27 | 
28 | 
29 | def DeepLabV1_ResNet101(n_classes):
30 |     return DeepLabV1(n_classes=n_classes, n_blocks=[3, 4, 23, 3])
31 | 
32 | 
33 | def DeepLabV2_ResNet101_MSC(n_classes):
34 |     return MSC(
35 |         base=DeepLabV2(
36 |             n_classes=n_classes, n_blocks=[3, 4, 23, 3], atrous_rates=[6, 12, 18, 24]
37 |         ),
38 |         scales=[0.5, 0.75],
39 |     )
40 | 
41 | 
42 | def DeepLabV2S_ResNet101_MSC(n_classes):
43 |     return MSC(
44 |         base=DeepLabV2(
45 |             n_classes=n_classes, n_blocks=[3, 4, 23, 3], atrous_rates=[3, 6, 9, 12]
46 |         ),
47 |         scales=[0.5, 0.75],
48 |     )
49 | 
50 | 
51 | def DeepLabV3_ResNet101_MSC(n_classes, output_stride=16):
52 |     if output_stride == 16:
53 |         atrous_rates = [6, 12, 18]
54 |     elif output_stride == 8:
55 |         atrous_rates = [12, 24, 36]
56 |     else:
57 |         NotImplementedError
58 | 
59 |     base = DeepLabV3(
60 |         n_classes=n_classes,
61 |         n_blocks=[3, 4, 23, 3],
62 |         atrous_rates=atrous_rates,
63 |         multi_grids=[1, 2, 4],
64 |         output_stride=output_stride,
65 |     )
66 | 
67 |     for name, module in base.named_modules():
68 |         if ".bn" in name:
69 |             module.momentum = 0.9997
70 | 
71 |     return MSC(base=base, scales=[0.5, 0.75])
72 | 
73 | 
74 | def DeepLabV3Plus_ResNet101_MSC(n_classes, output_stride=16):
75 |     if output_stride == 16:
76 |         atrous_rates = [6, 12, 18]
77 |     elif output_stride == 8:
78 |         atrous_rates = [12, 24, 36]
79 |     else:
80 |         NotImplementedError
81 | 
82 |     base = DeepLabV3Plus(
83 |         n_classes=n_classes,
84 |         n_blocks=[3, 4, 23, 3],
85 |         atrous_rates=atrous_rates,
86 |         multi_grids=[1, 2, 4],
87 |         output_stride=output_stride,
88 |     )
89 | 
90 |     for name, module in base.named_modules():
91 |         if ".bn" in name:
92 |             module.momentum = 0.9997
93 | 
94 |     return MSC(base=base, scales=[0.5, 0.75])
95 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/deeplabv3plus.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | #
 4 | # Author:   Kazuto Nakashima
 5 | # URL:      http://kazuto1011.github.io
 6 | # Created:  2018-03-26
 7 | 
 8 | from __future__ import absolute_import, print_function
 9 | 
10 | from collections import OrderedDict
11 | 
12 | import torch
13 | import torch.nn as nn
14 | import torch.nn.functional as F
15 | 
16 | from .deeplabv3 import _ASPP
17 | from .resnet import _ConvBnReLU, _ResLayer, _Stem
18 | 
19 | 
20 | class DeepLabV3Plus(nn.Module):
21 |     """
22 |     DeepLab v3+: Dilated ResNet with multi-grid + improved ASPP + decoder
23 |     """
24 | 
25 |     def __init__(self, n_classes, n_blocks, atrous_rates, multi_grids, output_stride):
26 |         super(DeepLabV3Plus, self).__init__()
27 | 
28 |         # Stride and dilation
29 |         if output_stride == 8:
30 |             s = [1, 2, 1, 1]
31 |             d = [1, 1, 2, 4]
32 |         elif output_stride == 16:
33 |             s = [1, 2, 2, 1]
34 |             d = [1, 1, 1, 2]
35 | 
36 |         # Encoder
37 |         ch = [64 * 2 ** p for p in range(6)]
38 |         self.layer1 = _Stem(ch[0])
39 |         self.layer2 = _ResLayer(n_blocks[0], ch[0], ch[2], s[0], d[0])
40 |         self.layer3 = _ResLayer(n_blocks[1], ch[2], ch[3], s[1], d[1])
41 |         self.layer4 = _ResLayer(n_blocks[2], ch[3], ch[4], s[2], d[2])
42 |         self.layer5 = _ResLayer(n_blocks[3], ch[4], ch[5], s[3], d[3], multi_grids)
43 |         self.aspp = _ASPP(ch[5], 256, atrous_rates)
44 |         concat_ch = 256 * (len(atrous_rates) + 2)
45 |         self.add_module("fc1", _ConvBnReLU(concat_ch, 256, 1, 1, 0, 1))
46 | 
47 |         # Decoder
48 |         self.reduce = _ConvBnReLU(256, 48, 1, 1, 0, 1)
49 |         self.fc2 = nn.Sequential(
50 |             OrderedDict(
51 |                 [
52 |                     ("conv1", _ConvBnReLU(304, 256, 3, 1, 1, 1)),
53 |                     ("conv2", _ConvBnReLU(256, 256, 3, 1, 1, 1)),
54 |                     ("conv3", nn.Conv2d(256, n_classes, kernel_size=1)),
55 |                 ]
56 |             )
57 |         )
58 | 
59 |     def forward(self, x):
60 |         h = self.layer1(x)
61 |         h = self.layer2(h)
62 |         h_ = self.reduce(h)
63 |         h = self.layer3(h)
64 |         h = self.layer4(h)
65 |         h = self.layer5(h)
66 |         h = self.aspp(h)
67 |         h = self.fc1(h)
68 |         h = F.interpolate(h, size=h_.shape[2:], mode="bilinear", align_corners=False)
69 |         h = torch.cat((h, h_), dim=1)
70 |         h = self.fc2(h)
71 |         h = F.interpolate(h, size=x.shape[2:], mode="bilinear", align_corners=False)
72 |         return h
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     model = DeepLabV3Plus(
77 |         n_classes=21,
78 |         n_blocks=[3, 4, 23, 3],
79 |         atrous_rates=[6, 12, 18],
80 |         multi_grids=[1, 2, 4],
81 |         output_stride=16,
82 |     )
83 |     model.eval()
84 |     image = torch.randn(1, 3, 513, 513)
85 | 
86 |     print(model)
87 |     print("input:", image.shape)
88 |     print("output:", model(image).shape)
89 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/data/datasets/cocostuff/labels.txt:
--------------------------------------------------------------------------------
  1 | 0	person
  2 | 1	bicycle
  3 | 2	car
  4 | 3	motorcycle
  5 | 4	airplane
  6 | 5	bus
  7 | 6	train
  8 | 7	truck
  9 | 8	boat
 10 | 9	traffic light
 11 | 10	fire hydrant
 12 | 11	street sign
 13 | 12	stop sign
 14 | 13	parking meter
 15 | 14	bench
 16 | 15	bird
 17 | 16	cat
 18 | 17	dog
 19 | 18	horse
 20 | 19	sheep
 21 | 20	cow
 22 | 21	elephant
 23 | 22	bear
 24 | 23	zebra
 25 | 24	giraffe
 26 | 25	hat
 27 | 26	backpack
 28 | 27	umbrella
 29 | 28	shoe
 30 | 29	eye glasses
 31 | 30	handbag
 32 | 31	tie
 33 | 32	suitcase
 34 | 33	frisbee
 35 | 34	skis
 36 | 35	snowboard
 37 | 36	sports ball
 38 | 37	kite
 39 | 38	baseball bat
 40 | 39	baseball glove
 41 | 40	skateboard
 42 | 41	surfboard
 43 | 42	tennis racket
 44 | 43	bottle
 45 | 44	plate
 46 | 45	wine glass
 47 | 46	cup
 48 | 47	fork
 49 | 48	knife
 50 | 49	spoon
 51 | 50	bowl
 52 | 51	banana
 53 | 52	apple
 54 | 53	sandwich
 55 | 54	orange
 56 | 55	broccoli
 57 | 56	carrot
 58 | 57	hot dog
 59 | 58	pizza
 60 | 59	donut
 61 | 60	cake
 62 | 61	chair
 63 | 62	couch
 64 | 63	potted plant
 65 | 64	bed
 66 | 65	mirror
 67 | 66	dining table
 68 | 67	window
 69 | 68	desk
 70 | 69	toilet
 71 | 70	door
 72 | 71	tv
 73 | 72	laptop
 74 | 73	mouse
 75 | 74	remote
 76 | 75	keyboard
 77 | 76	cell phone
 78 | 77	microwave
 79 | 78	oven
 80 | 79	toaster
 81 | 80	sink
 82 | 81	refrigerator
 83 | 82	blender
 84 | 83	book
 85 | 84	clock
 86 | 85	vase
 87 | 86	scissors
 88 | 87	teddy bear
 89 | 88	hair drier
 90 | 89	toothbrush
 91 | 90	hair brush
 92 | 91	banner
 93 | 92	blanket
 94 | 93	branch
 95 | 94	bridge
 96 | 95	building-other
 97 | 96	bush
 98 | 97	cabinet
 99 | 98	cage
100 | 99	cardboard
101 | 100	carpet
102 | 101	ceiling-other
103 | 102	ceiling-tile
104 | 103	cloth
105 | 104	clothes
106 | 105	clouds
107 | 106	counter
108 | 107	cupboard
109 | 108	curtain
110 | 109	desk-stuff
111 | 110	dirt
112 | 111	door-stuff
113 | 112	fence
114 | 113	floor-marble
115 | 114	floor-other
116 | 115	floor-stone
117 | 116	floor-tile
118 | 117	floor-wood
119 | 118	flower
120 | 119	fog
121 | 120	food-other
122 | 121	fruit
123 | 122	furniture-other
124 | 123	grass
125 | 124	gravel
126 | 125	ground-other
127 | 126	hill
128 | 127	house
129 | 128	leaves
130 | 129	light
131 | 130	mat
132 | 131	metal
133 | 132	mirror-stuff
134 | 133	moss
135 | 134	mountain
136 | 135	mud
137 | 136	napkin
138 | 137	net
139 | 138	paper
140 | 139	pavement
141 | 140	pillow
142 | 141	plant-other
143 | 142	plastic
144 | 143	platform
145 | 144	playingfield
146 | 145	railing
147 | 146	railroad
148 | 147	river
149 | 148	road
150 | 149	rock
151 | 150	roof
152 | 151	rug
153 | 152	salad
154 | 153	sand
155 | 154	sea
156 | 155	shelf
157 | 156	sky-other
158 | 157	skyscraper
159 | 158	snow
160 | 159	solid-other
161 | 160	stairs
162 | 161	stone
163 | 162	straw
164 | 163	structural-other
165 | 164	table
166 | 165	tent
167 | 166	textile-other
168 | 167	towel
169 | 168	tree
170 | 169	vegetable
171 | 170	wall-brick
172 | 171	wall-concrete
173 | 172	wall-other
174 | 173	wall-panel
175 | 174	wall-stone
176 | 175	wall-tile
177 | 176	wall-wood
178 | 177	water-other
179 | 178	waterdrops
180 | 179	window-blind
181 | 180	window-other
182 | 181	wood


--------------------------------------------------------------------------------
/utils/Metrics.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import cv2
  3 | import numpy as np
  4 | 
  5 | def accuracy(logits, target, topk=(1,)):
  6 |     '''
  7 |     Compute the top k accuracy of classification results.
  8 |     :param target: the ground truth label
  9 |     :param topk: tuple or list of the expected k values.
 10 |     :return: A list of the accuracy values. The list has the same lenght with para: topk
 11 |     '''
 12 |     maxk = max(topk)
 13 |     batch_size = target.size(0)
 14 |     scores = logits
 15 | 
 16 |     _, pred = scores.topk(maxk, 1, True, True)
 17 |     pred = pred.t()
 18 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
 19 | 
 20 |     res = []
 21 |     for k in topk:
 22 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
 23 |         res.append(correct_k.mul_(100.0 / batch_size))
 24 |     return res
 25 | 
 26 | 
 27 | from sklearn import metrics
 28 | def get_mAP(gt_labels, pred_scores):
 29 |     n_classes = np.shape(gt_labels)[1]
 30 |     results = []
 31 |     for i in range(n_classes):
 32 |         res = metrics.average_precision_score(gt_labels[:,i], pred_scores[:,i])
 33 |         results.append(res)
 34 | 
 35 |     results = map(lambda x: '%.3f'%(x), results)
 36 |     cls_map = np.array(map(float, results))
 37 |     return cls_map
 38 | 
 39 | def get_AUC(gt_labels, pred_scores):
 40 |     res = metrics.roc_auc_score(gt_labels, pred_scores)
 41 |     return res
 42 | 
 43 | def _to_numpy(v):
 44 |     v = torch.squeeze(v)
 45 |     if torch.is_tensor(v):
 46 |         v = v.cpu()
 47 |         v = v.numpy()
 48 |     elif isinstance(v, torch.autograd.Variable):
 49 |         v = v.cpu().data.numpy()
 50 | 
 51 |     return v
 52 | 
 53 | def get_iou(pred, gt):
 54 |     '''
 55 |     IoU which is averaged by images
 56 |     :param pred:
 57 |     :param gt:
 58 |     :return:
 59 |     '''
 60 |     pred = _to_numpy(pred)
 61 |     gt = _to_numpy(gt)
 62 |     pred[gt==255] = 255
 63 | 
 64 |     assert pred.shape == gt.shape
 65 | 
 66 |     gt = gt.astype(np.float32)
 67 |     pred = pred.astype(np.float32)
 68 | 
 69 |     # max_label = int(args['--NoLabels']) - 1  # labels from 0,1, ... 20(for VOC)
 70 |     count = np.zeros((20 + 1,))
 71 |     for j in range(20 + 1):
 72 |         x = np.where(pred == j)
 73 |         p_idx_j = set(zip(x[0].tolist(), x[1].tolist()))
 74 |         x = np.where(gt == j)
 75 |         GT_idx_j = set(zip(x[0].tolist(), x[1].tolist()))
 76 |         # pdb.set_trace()
 77 |         n_jj = set.intersection(p_idx_j, GT_idx_j)
 78 |         u_jj = set.union(p_idx_j, GT_idx_j)
 79 | 
 80 |         if len(GT_idx_j) != 0:
 81 |             count[j] = float(len(n_jj)) / float(len(u_jj))
 82 | 
 83 |     result_class = count
 84 |     unique_classes = len(np.unique(gt))-1 if 255 in np.unique(gt).tolist() else len(np.unique(gt))
 85 |     # unique_classes = len(np.unique(gt))
 86 |     Aiou = np.sum(result_class[:]) / float(unique_classes)
 87 | 
 88 |     return Aiou
 89 | 
 90 | def fast_hist(pred, gt, n=21):
 91 |     pred = _to_numpy(pred)
 92 |     gt = _to_numpy(gt)
 93 |     k = (gt >= 0) & (gt < n)
 94 |     return np.bincount(n * pred[k].astype(int) + gt[k], minlength=n**2).reshape(n, n)
 95 | 
 96 | def get_voc_iou(hist):
 97 |     miou = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
 98 |     return miou
 99 | 
100 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/deeplabv3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | #
 4 | # Author:   Kazuto Nakashima
 5 | # URL:      http://kazuto1011.github.io
 6 | # Created:  2018-03-26
 7 | 
 8 | from __future__ import absolute_import, print_function
 9 | 
10 | from collections import OrderedDict
11 | 
12 | import torch
13 | import torch.nn as nn
14 | import torch.nn.functional as F
15 | 
16 | from .resnet import _ConvBnReLU, _ResLayer, _Stem
17 | 
18 | 
19 | class _ImagePool(nn.Module):
20 |     def __init__(self, in_ch, out_ch):
21 |         super().__init__()
22 |         self.pool = nn.AdaptiveAvgPool2d(1)
23 |         self.conv = _ConvBnReLU(in_ch, out_ch, 1, 1, 0, 1)
24 | 
25 |     def forward(self, x):
26 |         _, _, H, W = x.shape
27 |         h = self.pool(x)
28 |         h = self.conv(h)
29 |         h = F.interpolate(h, size=(H, W), mode="bilinear", align_corners=False)
30 |         return h
31 | 
32 | 
33 | class _ASPP(nn.Module):
34 |     """
35 |     Atrous spatial pyramid pooling with image-level feature
36 |     """
37 | 
38 |     def __init__(self, in_ch, out_ch, rates):
39 |         super(_ASPP, self).__init__()
40 |         self.stages = nn.Module()
41 |         self.stages.add_module("c0", _ConvBnReLU(in_ch, out_ch, 1, 1, 0, 1))
42 |         for i, rate in enumerate(rates):
43 |             self.stages.add_module(
44 |                 "c{}".format(i + 1),
45 |                 _ConvBnReLU(in_ch, out_ch, 3, 1, padding=rate, dilation=rate),
46 |             )
47 |         self.stages.add_module("imagepool", _ImagePool(in_ch, out_ch))
48 | 
49 |     def forward(self, x):
50 |         return torch.cat([stage(x) for stage in self.stages.children()], dim=1)
51 | 
52 | 
53 | class DeepLabV3(nn.Sequential):
54 |     """
55 |     DeepLab v3: Dilated ResNet with multi-grid + improved ASPP
56 |     """
57 | 
58 |     def __init__(self, n_classes, n_blocks, atrous_rates, multi_grids, output_stride):
59 |         super(DeepLabV3, self).__init__()
60 | 
61 |         # Stride and dilation
62 |         if output_stride == 8:
63 |             s = [1, 2, 1, 1]
64 |             d = [1, 1, 2, 4]
65 |         elif output_stride == 16:
66 |             s = [1, 2, 2, 1]
67 |             d = [1, 1, 1, 2]
68 | 
69 |         ch = [64 * 2 ** p for p in range(6)]
70 |         self.add_module("layer1", _Stem(ch[0]))
71 |         self.add_module("layer2", _ResLayer(n_blocks[0], ch[0], ch[2], s[0], d[0]))
72 |         self.add_module("layer3", _ResLayer(n_blocks[1], ch[2], ch[3], s[1], d[1]))
73 |         self.add_module("layer4", _ResLayer(n_blocks[2], ch[3], ch[4], s[2], d[2]))
74 |         self.add_module(
75 |             "layer5", _ResLayer(n_blocks[3], ch[4], ch[5], s[3], d[3], multi_grids)
76 |         )
77 |         self.add_module("aspp", _ASPP(ch[5], 256, atrous_rates))
78 |         concat_ch = 256 * (len(atrous_rates) + 2)
79 |         self.add_module("fc1", _ConvBnReLU(concat_ch, 256, 1, 1, 0, 1))
80 |         self.add_module("fc2", nn.Conv2d(256, n_classes, kernel_size=1))
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     model = DeepLabV3(
85 |         n_classes=21,
86 |         n_blocks=[3, 4, 23, 3],
87 |         atrous_rates=[6, 12, 18],
88 |         multi_grids=[1, 2, 4],
89 |         output_stride=8,
90 |     )
91 |     model.eval()
92 |     image = torch.randn(1, 3, 513, 513)
93 | 
94 |     print(model)
95 |     print("input:", image.shape)
96 |     print("output:", model(image).shape)
97 | 


--------------------------------------------------------------------------------
/models/vgg1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.utils.model_zoo as model_zoo
  4 | import torch.nn.functional as F
  5 | import math
  6 | import cv2
  7 | import numpy as np
  8 | import os
  9 | 
 10 | model_urls = {'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth'}
 11 | 
 12 | class VGG(nn.Module):
 13 | 
 14 |     def __init__(self, features, num_classes=20, init_weights=True):
 15 |         super(VGG, self).__init__()
 16 |         self.features = features
 17 |         self.extra_convs = nn.Sequential(
 18 |             nn.Conv2d(512, 512, kernel_size=3, padding=1),
 19 |             nn.ReLU(True),
 20 |             nn.Conv2d(512, 512, kernel_size=3, padding=1),
 21 |             nn.ReLU(True),
 22 |             nn.Conv2d(512, 512, kernel_size=3, padding=1),
 23 |             nn.ReLU(True),
 24 |             nn.Conv2d(512,num_classes,1)            
 25 |         )
 26 |         self._initialize_weights()
 27 | 
 28 |     def forward(self, x):
 29 |         x = self.features(x)
 30 |         x = self.extra_convs(x)
 31 |         return x
 32 | 
 33 |     def _initialize_weights(self):
 34 |         for m in self.modules():
 35 |             if isinstance(m, nn.Conv2d):
 36 |                 # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 37 |                 #m.weight.data.normal_(0, math.sqrt(2. / n))
 38 |                 m.weight.data.normal_(0, 0.01)
 39 |                 if m.bias is not None:
 40 |                     m.bias.data.zero_()
 41 |             elif isinstance(m, nn.BatchNorm2d):
 42 |                 m.weight.data.fill_(1)
 43 |                 m.bias.data.zero_()
 44 |             elif isinstance(m, nn.Linear):
 45 |                 m.weight.data.normal_(0, 0.01)
 46 |                 m.bias.data.zero_()
 47 |     
 48 |     def get_parameter_groups(self):
 49 |         groups = ([], [], [], [])
 50 | 
 51 |         for name, value in self.named_parameters():
 52 | 
 53 |             if 'extra' in name:
 54 |                 if 'weight' in name:
 55 |                     groups[2].append(value)
 56 |                 else:
 57 |                     groups[3].append(value)
 58 |             else:
 59 |                 if 'weight' in name:
 60 |                     groups[0].append(value)
 61 |                 else:
 62 |                     groups[1].append(value)
 63 |         print(groups[2])
 64 |         return groups
 65 | 
 66 | 
 67 | def make_layers(cfg, batch_norm=False):
 68 |     layers = []
 69 |     in_channels = 3
 70 |     for i, v in enumerate(cfg):
 71 |         if v == 'M':
 72 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
 73 |         elif v == 'N':
 74 |             layers += [nn.MaxPool2d(kernel_size=3, stride=1, padding=1)]
 75 |         else:
 76 |             if i > 13:
 77 |                 conv2d = nn.Conv2d(in_channels, v, kernel_size=3, dilation=2, padding=2)            
 78 |             else:
 79 |                 conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
 80 |             if batch_norm:
 81 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
 82 |             else:
 83 |                 layers += [conv2d, nn.ReLU(inplace=True)]
 84 |             in_channels = v
 85 |     return nn.Sequential(*layers)
 86 | 
 87 | 
 88 | cfg = {
 89 |     'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 90 |     'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 91 |     'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
 92 |     'D1': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'N', 512, 512, 512],
 93 |     'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
 94 | }
 95 | 
 96 | 
 97 | def vgg16(pretrained=False, **kwargs):
 98 |     model = VGG(make_layers(cfg['D1']), **kwargs)  
 99 |     if pretrained:
100 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg16']), strict=False)
101 |     return model
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # OAA-PyTorch
  2 | The Official PyTorch code for ["Integral Object Mining via Online Attention Accumulation"](http://openaccess.thecvf.com/content_ICCV_2019/papers/Jiang_Integral_Object_Mining_via_Online_Attention_Accumulation_ICCV_2019_paper.pdf), which is implemented based on the code of [psa](https://github.com/jiwoon-ahn/psa) and [ACoL](https://github.com/xiaomengyc/ACoL).  The segmentation framework is borrowed from [deeplab-pytorch](https://github.com/kazuto1011/deeplab-pytorch).
  3 | 
  4 | ## Installation
  5 | python3  
  6 | torch >= 1.0  
  7 | tqdm  
  8 | torchvision  
  9 | python-opencv
 10 | 
 11 | Download the [VOCdevkit.tar.gz](https://drive.google.com/file/d/1jnHE6Sau0tHI7X6JQKhzHov-vseYbrf9/view?usp=sharing) file and extract it into data/ folder.
 12 | 
 13 | ## Online Attention Accumulation
 14 | ```
 15 | cd OAA-PyTorch/
 16 | ./train.sh 
 17 | ```
 18 | After the training process, you can resize the accumulated attention map to original image size.
 19 | ```
 20 | python res.py
 21 | ```
 22 | For a comparison with the attention maps generated by the final classification model, you can generate them by
 23 | ```
 24 | ./test.sh
 25 | ```
 26 | 
 27 | ## Integal Attention Learning
 28 | If you want to skip the online attention accumulation process to train the integral model directly, Download the [pre-accumulated maps](https://drive.google.com/file/d/171hBXJu1Ty8eqiPtdqgZlR0D980WVBnr/view?usp=sharing) and extract them to `exp1/`.
 29 | ```
 30 | ./train_iam.sh
 31 | ./test_iam.sh
 32 | ```
 33 | 
 34 | ## Attention Drop Layer
 35 | ```
 36 | ./train+.sh 
 37 | ```
 38 | After the training process, you can resize the accumulated attention map to original image size.
 39 | ```
 40 | python res.py
 41 | ```
 42 | 
 43 | 
 44 | ## Weakly Supervised Segmentation
 45 | To train a segmentation model, you need to generate pseudo segmentation labels first by 
 46 | ```
 47 | python gen_gt.py
 48 | ```
 49 | This code will generate pseudo segmentation labels in './data/VOCdevkit/VOC2012/proxy-gt/'.
 50 | Then you can train the [deeplab-pytorch](https://github.com/kazuto1011/deeplab-pytorch) model as follows:  
 51 | ```
 52 | cd deeplab-pytorch
 53 | bash scripts/setup_caffemodels.sh
 54 | python convert.py --dataset coco
 55 | python convert.py --dataset voc12
 56 | ```
 57 | Train the segmentation model by
 58 | ```
 59 | python main.py train \
 60 |       --config-path configs/voc2012.yaml
 61 | ```
 62 | Test the segmentation model by 
 63 | ```
 64 | python main.py test \
 65 |     --config-path configs/voc12.yaml \
 66 |     --model-path data/models/voc12/deeplabv2_resnet101_msc/train_aug/checkpoint_final.pth
 67 | ```
 68 | Apply the crf post-processing by 
 69 | ```
 70 | python main.py crf \
 71 |     --config-path configs/voc12.yaml
 72 | ```
 73 | ## Performance
 74 | Method |mIoU | mIoU (crf)  
 75 | --- |:---:|:---:
 76 | OAA  | 65.7 | 66.9 
 77 | OAA<sup>+ | 66.6 | 67.8
 78 | OAA-drop | 67.5 | 68.8
 79 | 
 80 | If you have any question about OAA, please feel free to contact [Me](https://pengtaojiang.github.io/) (pt.jiang AT mail DOT nankai.edu.cn). 
 81 | 
 82 | ## Citation
 83 | If you use these codes and models in your research, please cite:
 84 | ```
 85 | @inproceedings{jiang2019integral,   
 86 |       title={Integral Object Mining via Online Attention Accumulation},   
 87 |       author={Jiang, Peng-Tao and Hou, Qibin and Cao, Yang and Cheng, Ming-Ming and Wei, Yunchao and Xiong, Hong-Kai},   
 88 |       booktitle={Proceedings of the IEEE International Conference on Computer Vision},   
 89 |       pages={2070--2079},   
 90 |       year={2019} 
 91 | }
 92 | ```
 93 | ```
 94 | @article{jiang2021online,
 95 |   title={Online Attention Accumulation for Weakly Supervised Semantic Segmentation},
 96 |   author={Jiang, Peng-Tao and Han, Ling-Hao and Hou, Qibin and Cheng, Ming-Ming and Wei, Yunchao},
 97 |   journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
 98 |   year={2021},
 99 |   publisher={IEEE}
100 | }
101 | ```
102 | ## License
103 | The code is released under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License for NonCommercial use only. Any commercial use should get formal permission first.
104 |   
105 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/datasets/base.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | #
  4 | # Author:   Kazuto Nakashima
  5 | # URL:      http://kazuto1011.github.io
  6 | # Created:  2017-10-30
  7 | 
  8 | import random
  9 | 
 10 | import cv2
 11 | import numpy as np
 12 | import torch
 13 | from PIL import Image
 14 | from torch.utils import data
 15 | 
 16 | 
 17 | class _BaseDataset(data.Dataset):
 18 |     """
 19 |     Base dataset class
 20 |     """
 21 | 
 22 |     def __init__(
 23 |         self,
 24 |         root,
 25 |         split,
 26 |         ignore_label,
 27 |         mean_bgr,
 28 |         augment=True,
 29 |         base_size=None,
 30 |         crop_size=321,
 31 |         scales=(1.0),
 32 |         flip=True,
 33 |     ):
 34 |         self.root = root
 35 |         self.split = split
 36 |         self.ignore_label = ignore_label
 37 |         self.mean_bgr = np.array(mean_bgr)
 38 |         self.augment = augment
 39 |         self.base_size = base_size
 40 |         self.crop_size = crop_size
 41 |         self.scales = scales
 42 |         self.flip = flip
 43 |         self.files = []
 44 |         self._set_files()
 45 | 
 46 |         cv2.setNumThreads(0)
 47 | 
 48 |     def _set_files(self):
 49 |         """
 50 |         Create a file path/image id list.
 51 |         """
 52 |         raise NotImplementedError()
 53 | 
 54 |     def _load_data(self, image_id):
 55 |         """
 56 |         Load the image and label in numpy.ndarray
 57 |         """
 58 |         raise NotImplementedError()
 59 | 
 60 |     def _augmentation(self, image, label):
 61 |         # Scaling
 62 |         h, w = label.shape
 63 |         if self.base_size:
 64 |             if h > w:
 65 |                 h, w = (self.base_size, int(self.base_size * w / h))
 66 |             else:
 67 |                 h, w = (int(self.base_size * h / w), self.base_size)
 68 |         scale_factor = random.choice(self.scales)
 69 |         h, w = (int(h * scale_factor), int(w * scale_factor))
 70 |         image = cv2.resize(image, (w, h), interpolation=cv2.INTER_LINEAR)
 71 |         label = Image.fromarray(label).resize((w, h), resample=Image.NEAREST)
 72 |         label = np.asarray(label, dtype=np.int64)
 73 | 
 74 |         # Padding to fit for crop_size
 75 |         h, w = label.shape
 76 |         pad_h = max(self.crop_size - h, 0)
 77 |         pad_w = max(self.crop_size - w, 0)
 78 |         pad_kwargs = {
 79 |             "top": 0,
 80 |             "bottom": pad_h,
 81 |             "left": 0,
 82 |             "right": pad_w,
 83 |             "borderType": cv2.BORDER_CONSTANT,
 84 |         }
 85 |         if pad_h > 0 or pad_w > 0:
 86 |             image = cv2.copyMakeBorder(image, value=self.mean_bgr, **pad_kwargs)
 87 |             label = cv2.copyMakeBorder(label, value=self.ignore_label, **pad_kwargs)
 88 | 
 89 |         # Cropping
 90 |         h, w = label.shape
 91 |         start_h = random.randint(0, h - self.crop_size)
 92 |         start_w = random.randint(0, w - self.crop_size)
 93 |         end_h = start_h + self.crop_size
 94 |         end_w = start_w + self.crop_size
 95 |         image = image[start_h:end_h, start_w:end_w]
 96 |         label = label[start_h:end_h, start_w:end_w]
 97 | 
 98 |         if self.flip:
 99 |             # Random flipping
100 |             if random.random() < 0.5:
101 |                 image = np.fliplr(image).copy()  # HWC
102 |                 label = np.fliplr(label).copy()  # HW
103 |         return image, label
104 | 
105 |     def __getitem__(self, index):
106 |         image_id, image, label = self._load_data(index)
107 |         if self.augment:
108 |             image, label = self._augmentation(image, label)
109 |         # Mean subtraction
110 |         image -= self.mean_bgr
111 |         # HWC -> CHW
112 |         image = image.transpose(2, 0, 1)
113 |         return image_id, image.astype(np.float32), label.astype(np.int64)
114 | 
115 |     def __len__(self):
116 |         return len(self.files)
117 | 
118 |     def __repr__(self):
119 |         fmt_str = "Dataset: " + self.__class__.__name__ + "\n"
120 |         fmt_str += "    # data: {}\n".format(self.__len__())
121 |         fmt_str += "    Split: {}\n".format(self.split)
122 |         fmt_str += "    Root: {}".format(self.root)
123 |         return fmt_str
124 | 


--------------------------------------------------------------------------------
/scripts/test_iam.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | sys.path.append(os.getcwd())
  4 | 
  5 | import cv2
  6 | import torch
  7 | import numpy as np
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import argparse
 11 | import torchvision
 12 | from torchvision import models, transforms
 13 | from torch.utils.data import DataLoader
 14 | from utils.LoadData import test_data_loader
 15 | from utils.Restore import restore
 16 | import matplotlib.pyplot as plt
 17 | from models import vgg1
 18 | from tqdm import tqdm
 19 | import matplotlib as mpl
 20 | import matplotlib.pyplot as plt
 21 | 
 22 | colormaps = ['#000000', '#7F0000', '#007F00', '#7F7F00', '#00007F', '#7F007F', '#007F7F', '#7F7F7F', '#3F0000', '#BF0000', '#3F7F00',
 23 |                         '#BF7F00', '#3F007F', '#BF007F', '#3F7F7F', '#BF7F7F', '#003F00', '#7F3F00', '#00BF00', '#7FBF00', '#003F7F']
 24 | 
 25 | def colormap(index):
 26 |     return mpl.colors.LinearSegmentedColormap.from_list('cmap', [colormaps[0], colormaps[index+1], '#FFFFFF'], 256)
 27 |     
 28 | def get_arguments():
 29 |     parser = argparse.ArgumentParser(description='OAA')
 30 |     parser.add_argument("--root_dir", type=str, default='./')
 31 |     parser.add_argument("--save_dir", type=str, default='')
 32 |     parser.add_argument("--img_dir", type=str, default='')
 33 |     parser.add_argument("--test_list", type=str, default='')
 34 |     parser.add_argument("--batch_size", type=int, default=1)
 35 |     parser.add_argument("--input_size", type=int, default=256)
 36 |     parser.add_argument("--dataset", type=str, default='imagenet')
 37 |     parser.add_argument("--num_classes", type=int, default=20)
 38 |     parser.add_argument("--arch", type=str,default='vgg_v0')
 39 |     parser.add_argument("--restore_from", type=str, default='')
 40 |     parser.add_argument("--num_workers", type=int, default=20)
 41 | 
 42 |     return parser.parse_args()
 43 | 
 44 | def get_model(args):
 45 |     model = vgg1.vgg16(num_classes=args.num_classes)
 46 |     model = torch.nn.DataParallel(model).cuda()
 47 | 
 48 |     pretrained_dict = torch.load(args.restore_from)['state_dict']
 49 |     model_dict = model.state_dict()
 50 |     
 51 |     print(model_dict.keys())
 52 |     print(pretrained_dict.keys())
 53 |     
 54 |     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()}
 55 |     print("Weights cannot be loaded:")
 56 |     print([k for k in model_dict.keys() if k not in pretrained_dict.keys()])
 57 | 
 58 |     model_dict.update(pretrained_dict)
 59 |     model.load_state_dict(model_dict)
 60 | 
 61 |     return  model
 62 | 
 63 | def validate(args):
 64 |     print('\nvalidating ... ', flush=True, end='')
 65 | 
 66 |     model = get_model(args)
 67 |     model.eval()
 68 |     val_loader = test_data_loader(args)
 69 |     
 70 |     if not os.path.exists(args.save_dir):
 71 |         os.mkdir(args.save_dir)
 72 |     with torch.no_grad():
 73 |         for idx, dat in tqdm(enumerate(val_loader)):
 74 |             img_name, img, label_in = dat
 75 |             label = label_in.cuda(non_blocking=True)
 76 |             logits = model(img)
 77 | 
 78 |             cv_im = cv2.imread(img_name[0])
 79 |             cv_im_gray = cv2.cvtColor(cv_im, cv2.COLOR_BGR2GRAY)
 80 |             height, width = cv_im.shape[:2]
 81 | 
 82 |             for l, featmap in enumerate(logits):
 83 |                 maps = featmap.cpu().data.numpy()
 84 |                 im_name = args.save_dir + img_name[0].split('/')[-1][:-4]
 85 |                 labels = label_in.long().numpy()[0]
 86 |                 for i in range(int(args.num_classes)):
 87 |                     if labels[i] == 1:
 88 |                         att = maps[i]
 89 |                         att[att < 0] = 0
 90 |                         att = att / (np.max(att) + 1e-8)
 91 |                         att = np.array(att * 255, dtype=np.uint8)
 92 |                         out_name = im_name + '_{}.png'.format(i)
 93 |                         att = cv2.resize(att, (width, height), interpolation=cv2.INTER_CUBIC)
 94 |                         #att = cv_im_gray * 0.2 + att * 0.8
 95 |                         cv2.imwrite(out_name, att)
 96 |                         #plt.imsave(out_name, att, cmap=colormap(i))
 97 | 
 98 | if __name__ == '__main__':
 99 |     args = get_arguments()
100 |     validate(args)
101 | 


--------------------------------------------------------------------------------
/scripts/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.append(os.getcwd())
  4 | 
  5 | import cv2
  6 | import torch
  7 | import numpy as np
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import argparse
 11 | import torchvision
 12 | from torchvision import models, transforms
 13 | from torch.utils.data import DataLoader
 14 | from utils.LoadData import test_data_loader
 15 | from utils.Restore import restore
 16 | import matplotlib.pyplot as plt
 17 | from models import vgg
 18 | from tqdm import tqdm
 19 | import matplotlib as mpl
 20 | import matplotlib.pyplot as plt
 21 | 
 22 | colormaps = ['#000000', '#7F0000', '#007F00', '#7F7F00', '#00007F', '#7F007F', '#007F7F', '#7F7F7F', '#3F0000', '#BF0000', '#3F7F00',
 23 |                         '#BF7F00', '#3F007F', '#BF007F', '#3F7F7F', '#BF7F7F', '#003F00', '#7F3F00', '#00BF00', '#7FBF00', '#003F7F']
 24 | 
 25 | def colormap(index):
 26 |     return mpl.colors.LinearSegmentedColormap.from_list('cmap', [colormaps[0], colormaps[index+1], '#FFFFFF'], 256)
 27 |     
 28 | def get_arguments():
 29 |     parser = argparse.ArgumentParser(description='ACoL')
 30 |     parser.add_argument("--root_dir", type=str, default='')
 31 |     parser.add_argument("--save_dir", type=str, default='')
 32 |     parser.add_argument("--img_dir", type=str, default='')
 33 |     parser.add_argument("--test_list", type=str, default='')
 34 |     parser.add_argument("--batch_size", type=int, default=1)
 35 |     parser.add_argument("--input_size", type=int, default=256)
 36 |     parser.add_argument("--dataset", type=str, default='voc2012')
 37 |     parser.add_argument("--num_classes", type=int, default=20)
 38 |     parser.add_argument("--num_workers", type=int, default=20)
 39 |     parser.add_argument("--arch", type=str,default='vgg_v0')
 40 |     parser.add_argument("--restore_from", type=str, default='')
 41 | 
 42 |     return parser.parse_args()
 43 | 
 44 | def get_model(args):
 45 |     model = vgg.vgg16(num_classes=args.num_classes)
 46 |     model = torch.nn.DataParallel(model).cuda()
 47 | 
 48 |     pretrained_dict = torch.load(args.restore_from)['state_dict']
 49 |     model_dict = model.state_dict()
 50 |     
 51 |     print(model_dict.keys())
 52 |     print(pretrained_dict.keys())
 53 |     
 54 |     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()}
 55 |     print("Weights cannot be loaded:")
 56 |     print([k for k in model_dict.keys() if k not in pretrained_dict.keys()])
 57 | 
 58 |     model_dict.update(pretrained_dict)
 59 |     model.load_state_dict(model_dict)
 60 | 
 61 |     return  model
 62 | 
 63 | def validate(args):
 64 |     print('\nvalidating ... ', flush=True, end='')
 65 | 
 66 |     model = get_model(args)
 67 |     model.eval()
 68 |     val_loader = test_data_loader(args)
 69 |     
 70 |     if not os.path.exists(args.save_dir):
 71 |         os.mkdir(args.save_dir)
 72 | 
 73 |     with torch.no_grad():
 74 |         for idx, dat in tqdm(enumerate(val_loader)):
 75 |             img_name, img, label_in = dat
 76 |             label = label_in.cuda(non_blocking=True)
 77 |             logits = model(img)
 78 |             last_featmaps = model.module.get_heatmaps()
 79 | 
 80 |             cv_im = cv2.imread(img_name[0])
 81 |             cv_im_gray = cv2.cvtColor(cv_im, cv2.COLOR_BGR2GRAY)
 82 |             height, width = cv_im.shape[:2]
 83 | 
 84 |             for l, featmap in enumerate(last_featmaps):
 85 |                 maps = featmap.cpu().data.numpy()
 86 |                 im_name = args.save_dir + img_name[0].split('/')[-1][:-4]
 87 |                 labels = label_in.long().numpy()[0]
 88 |                 for i in range(int(args.num_classes)):
 89 |                     if labels[i] == 1:
 90 |                         att = maps[i]
 91 |                         att[att < 0] = 0
 92 |                         att = att / (np.max(att) + 1e-8)
 93 |                         att = np.array(att * 255, dtype=np.uint8)
 94 |                         out_name = im_name + '_{}.png'.format(i)
 95 |                         att = cv2.resize(att, (width, height), interpolation=cv2.INTER_CUBIC)
 96 |                         #att = cv_im_gray * 0.2 + att * 0.8
 97 |                         cv2.imwrite(out_name, att)
 98 |                         #plt.imsave(out_name, att, cmap=colormap(i))
 99 | 
100 | if __name__ == '__main__':
101 |     args = get_arguments()
102 |     validate(args)
103 | 


--------------------------------------------------------------------------------
/gen_gt.py:
--------------------------------------------------------------------------------
  1 | 
  2 | #######################################################################
  3 | # This file is provided by Peng-Tao Jiang. If you have any questions, #
  4 | # please feel free to contact me (pt.jiang@mail.nankai.edu.cn).       #
  5 | #######################################################################
  6 | import cv2
  7 | from PIL import Image
  8 | import numpy as np
  9 | import pydensecrf.densecrf as dcrf
 10 | import multiprocessing
 11 | import os
 12 | from os.path import exists
 13 | 
 14 | palette = [0,0,0,  128,0,0,  0,128,0,  128,128,0,  0,0,128,  128,0,128,  0,128,128,  128,128,128,  
 15 | 					 64,0,0,  192,0,0,  64,128,0,  192,128,0,  64,0,128,  192,0,128,  64,128,128,  192,128,128,  
 16 | 					 0,64,0,  128,64,0,  0,192,0,  128,192,0,  0,64,128,  128,64,128,  0,192,128,  128,192,128,  
 17 | 					 64,64,0,  192,64,0,  64,192,0, 192,192,0]
 18 | 
 19 | cats = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
 20 |         'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv']
 21 | 
 22 | # set path for data
 23 | data_path = '/home/ubuntu/Project/datasets/VOCdevkit/VOC2012/'
 24 | train_lst_path = data_path + 'ImageSets/Segmentation/train_cls.txt'
 25 | im_path = data_path + 'JPEGImages/'
 26 | sal_path = data_path + 'saliency_aug/'
 27 | att_path = './runs/exp3/accu_att_zoom/'
 28 | save_path = './data/VOCdevkit/VOC2012/proxy-gt/'
 29 | 
 30 | if not exists(save_path):
 31 | 	os.makedirs(save_path)
 32 | 		
 33 | with open(train_lst_path) as f:
 34 |     lines = f.readlines()
 35 | 
 36 | # generate proxy ground-truth
 37 | def gen_gt(index):
 38 |     line = lines[index]
 39 |     line = line[:-1]
 40 |     fields = line.split()
 41 |     name = fields[0]
 42 |     im_name = im_path + name + '.jpg'
 43 |     bg_name = sal_path + name + '.png'
 44 |     #print(bg_name)
 45 |     if not os.path.exists(bg_name):
 46 |         return
 47 |     
 48 |     sal = cv2.imread(bg_name, 0)
 49 |     height, width = sal.shape
 50 |     gt = np.zeros((21, height, width), dtype=np.float32)
 51 |     sal = np.array(sal, dtype=np.float32)
 52 |     
 53 |     # some thresholds. 
 54 |     conflict = 0.9
 55 |     fg_thr = 0.3
 56 |     # the below two values are used for generating uncertainty pixels
 57 |     bg_thr = 32
 58 |     att_thr = 0.8
 59 | 
 60 |     # use saliency map to provide background cues
 61 |     gt[0] = (1 - (sal / 255))
 62 |     init_gt = np.zeros((height, width), dtype=float) 
 63 |     sal_att = sal.copy()  
 64 | 
 65 |     for i in range(len(fields) - 1):
 66 |         k = i + 1
 67 |         cls = int(fields[k])
 68 |         att_name = att_path + name + '_' + str(cls) + '.png'
 69 |         if not exists(att_name):
 70 |             continue
 71 |         
 72 |         # normalize attention to [0, 1] 
 73 |         att = cv2.imread(att_name, 0)
 74 |         att = (att - np.min(att)) / (np.max(att) - np.min(att) + 1e-8)
 75 |         gt[cls+1] = att.copy()
 76 |         sal_att = np.maximum(sal_att, (att > att_thr) *255)
 77 |     
 78 |     
 79 |     # throw low confidence values for all classes
 80 |     gt[gt < fg_thr] = 0
 81 |     
 82 |     # conflict pixels with multiple confidence values
 83 |     bg = np.array(gt > conflict, dtype=np.uint8)  
 84 |     bg = np.sum(bg, axis=0)
 85 |     gt = gt.argmax(0).astype(np.uint8)
 86 |     gt[bg > 1] = 255
 87 | 
 88 |     # pixels regarded as background but confidence saliency values 
 89 |     bg = np.array(sal_att >= bg_thr, dtype=np.uint8) * np.array(gt == 0, dtype=np.uint8)
 90 |     gt[bg > 0] = 255  
 91 | 
 92 |     # this is an engineering idea, for an image with a small ratio of semantic objects,
 93 |     # we ignore the whole image, I find that this operation have little impact on 
 94 |     out = gt.copy() 
 95 |     valid = np.array((out > 0) & (out < 255), dtype=int).sum()
 96 |     ratio = float(valid) / float(height * width)
 97 |     if ratio < 0.01:
 98 |         out[...] = 255
 99 | 
100 |     # output the proxy labels using the VOC12 label format
101 |     out = Image.fromarray(out.astype(np.uint8), mode='P')
102 |     #out.putpalette(palette)
103 |     out_name = save_path + name + '.png'
104 |     out.save(out_name)
105 |     
106 | ### Parallel Mode
107 | pool = multiprocessing.Pool(processes=16)
108 | pool.map(gen_gt, range(len(lines)))
109 | # pool.map(gen_gt, range(50))
110 | pool.close()
111 | pool.join()
112 | 
113 | # Loop Mode
114 | #for i in range(len(lines)):
115 | #    gen_gt(i)
116 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/models/resnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | #
  4 | # Author:   Kazuto Nakashima
  5 | # URL:      http://kazuto1011.github.io
  6 | # Created:  2017-11-19
  7 | 
  8 | from __future__ import absolute_import, print_function
  9 | 
 10 | from collections import OrderedDict
 11 | 
 12 | import torch
 13 | import torch.nn as nn
 14 | import torch.nn.functional as F
 15 | 
 16 | try:
 17 |     from encoding.nn import SyncBatchNorm
 18 | 
 19 |     _BATCH_NORM = SyncBatchNorm
 20 | except:
 21 |     _BATCH_NORM = nn.BatchNorm2d
 22 | 
 23 | _BOTTLENECK_EXPANSION = 4
 24 | 
 25 | 
 26 | class _ConvBnReLU(nn.Sequential):
 27 |     """
 28 |     Cascade of 2D convolution, batch norm, and ReLU.
 29 |     """
 30 | 
 31 |     BATCH_NORM = _BATCH_NORM
 32 | 
 33 |     def __init__(
 34 |         self, in_ch, out_ch, kernel_size, stride, padding, dilation, relu=True
 35 |     ):
 36 |         super(_ConvBnReLU, self).__init__()
 37 |         self.add_module(
 38 |             "conv",
 39 |             nn.Conv2d(
 40 |                 in_ch, out_ch, kernel_size, stride, padding, dilation, bias=False
 41 |             ),
 42 |         )
 43 |         self.add_module("bn", _BATCH_NORM(out_ch, eps=1e-5, momentum=1 - 0.999))
 44 | 
 45 |         if relu:
 46 |             self.add_module("relu", nn.ReLU())
 47 | 
 48 | 
 49 | class _Bottleneck(nn.Module):
 50 |     """
 51 |     Bottleneck block of MSRA ResNet.
 52 |     """
 53 | 
 54 |     def __init__(self, in_ch, out_ch, stride, dilation, downsample):
 55 |         super(_Bottleneck, self).__init__()
 56 |         mid_ch = out_ch // _BOTTLENECK_EXPANSION
 57 |         self.reduce = _ConvBnReLU(in_ch, mid_ch, 1, stride, 0, 1, True)
 58 |         self.conv3x3 = _ConvBnReLU(mid_ch, mid_ch, 3, 1, dilation, dilation, True)
 59 |         self.increase = _ConvBnReLU(mid_ch, out_ch, 1, 1, 0, 1, False)
 60 |         self.shortcut = (
 61 |             _ConvBnReLU(in_ch, out_ch, 1, stride, 0, 1, False)
 62 |             if downsample
 63 |             else nn.Identity()
 64 |         )
 65 | 
 66 |     def forward(self, x):
 67 |         h = self.reduce(x)
 68 |         h = self.conv3x3(h)
 69 |         h = self.increase(h)
 70 |         h += self.shortcut(x)
 71 |         return F.relu(h)
 72 | 
 73 | 
 74 | class _ResLayer(nn.Sequential):
 75 |     """
 76 |     Residual layer with multi grids
 77 |     """
 78 | 
 79 |     def __init__(self, n_layers, in_ch, out_ch, stride, dilation, multi_grids=None):
 80 |         super(_ResLayer, self).__init__()
 81 | 
 82 |         if multi_grids is None:
 83 |             multi_grids = [1 for _ in range(n_layers)]
 84 |         else:
 85 |             assert n_layers == len(multi_grids)
 86 | 
 87 |         # Downsampling is only in the first block
 88 |         for i in range(n_layers):
 89 |             self.add_module(
 90 |                 "block{}".format(i + 1),
 91 |                 _Bottleneck(
 92 |                     in_ch=(in_ch if i == 0 else out_ch),
 93 |                     out_ch=out_ch,
 94 |                     stride=(stride if i == 0 else 1),
 95 |                     dilation=dilation * multi_grids[i],
 96 |                     downsample=(True if i == 0 else False),
 97 |                 ),
 98 |             )
 99 | 
100 | 
101 | class _Stem(nn.Sequential):
102 |     """
103 |     The 1st conv layer.
104 |     Note that the max pooling is different from both MSRA and FAIR ResNet.
105 |     """
106 | 
107 |     def __init__(self, out_ch):
108 |         super(_Stem, self).__init__()
109 |         self.add_module("conv1", _ConvBnReLU(3, out_ch, 7, 2, 3, 1))
110 |         self.add_module("pool", nn.MaxPool2d(3, 2, 1, ceil_mode=True))
111 | 
112 | 
113 | class ResNet(nn.Sequential):
114 |     def __init__(self, n_classes, n_blocks):
115 |         super(ResNet, self).__init__()
116 |         ch = [64 * 2 ** p for p in range(6)]
117 |         self.add_module("layer1", _Stem(ch[0]))
118 |         self.add_module("layer2", _ResLayer(n_blocks[0], ch[0], ch[2], 1, 1))
119 |         self.add_module("layer3", _ResLayer(n_blocks[1], ch[2], ch[3], 2, 1))
120 |         self.add_module("layer4", _ResLayer(n_blocks[2], ch[3], ch[4], 2, 1))
121 |         self.add_module("layer5", _ResLayer(n_blocks[3], ch[4], ch[5], 2, 1))
122 |         self.add_module("pool5", nn.AdaptiveAvgPool2d(1))
123 |         self.add_module("flatten", nn.Flatten())
124 |         self.add_module("fc", nn.Linear(ch[5], n_classes))
125 | 
126 | 
127 | if __name__ == "__main__":
128 |     model = ResNet(n_classes=1000, n_blocks=[3, 4, 23, 3])
129 |     model.eval()
130 |     image = torch.randn(1, 3, 224, 224)
131 | 
132 |     print(model)
133 |     print("input:", image.shape)
134 |     print("output:", model(image).shape)
135 | 


--------------------------------------------------------------------------------
/scripts/my_optim.py:
--------------------------------------------------------------------------------
  1 | import torch.optim as optim
  2 | from torch.optim.lr_scheduler import LambdaLR
  3 | import numpy as np
  4 | 
  5 | #def get_finetune_optimizer(args, model):
  6 | #    lr = args.lr
  7 | #    weight_list = []
  8 | #    bias_list = []
  9 | #    last_weight_list = []
 10 | #    last_bias_list =[]
 11 | #    for name, value in model.named_parameters():
 12 | #    #   if 'features' in name:
 13 | #    #       value.requires_grad = False
 14 | #        if 'cls' in name:
 15 | #            if 'weight' in name:
 16 | #                last_weight_list.append(value)
 17 | #            elif 'bias' in name:
 18 | #                last_bias_list.append(value)
 19 | #        else:
 20 | #            if 'weight' in name:
 21 | #                weight_list.append(value)
 22 | #            elif 'bias' in name:
 23 | #                bias_list.append(value)
 24 | #
 25 | #    opt = optim.SGD([{'params': weight_list, 'lr':lr},
 26 | #                     {'params':bias_list, 'lr':lr*2},
 27 | #                     {'params':last_weight_list, 'lr':lr*10},
 28 | #                     {'params': last_bias_list, 'lr':lr*20}], momentum=0.9, weight_decay=args.weight_decay, nesterov=True)
 29 | #
 30 | #    return opt
 31 | 
 32 | class PolyOptimizer(optim.SGD):
 33 | 
 34 |     def __init__(self, params, lr, weight_decay, max_step, momentum=0.9):
 35 |         super().__init__(params, lr, weight_decay)
 36 |         self.param_groups = params
 37 |         self.global_step = 0
 38 |         self.max_step = max_step
 39 |         self.momentum = momentum
 40 | 
 41 |         self.__initial_lr = [group['lr'] for group in self.param_groups]
 42 | 
 43 | 
 44 |     def step(self, closure=None):
 45 | 
 46 |         if self.global_step < self.max_step:
 47 |             lr_mult = (1 - self.global_step / self.max_step) ** self.momentum
 48 | 
 49 |             for i in range(len(self.param_groups)):
 50 |                 self.param_groups[i]['lr'] = self.__initial_lr[i] * lr_mult
 51 |         super().step(closure)
 52 | 
 53 |         self.global_step += 1
 54 | 
 55 | 
 56 | def lr_poly(base_lr, iter,max_iter,power=0.9):
 57 |     return base_lr*((1-float(iter)/max_iter)**(power))
 58 | 
 59 | def reduce_lr_poly(args, optimizer, global_iter, max_iter):
 60 |     base_lr = args.lr
 61 |     for g in optimizer.param_groups:
 62 |         g['lr'] = lr_poly(base_lr=base_lr, iter=global_iter, max_iter=max_iter, power=0.9)
 63 | 
 64 | def get_optimizer(args, model):
 65 |     lr = args.lr
 66 |     # opt = optim.SGD(params=model.parameters(), lr=lr, momentum=0.9, weight_decay=0.0001)
 67 |     opt = optim.SGD(params=[para for name, para in model.named_parameters() if 'features' not in name], lr=lr, momentum=0.9, weight_decay=0.0005)
 68 |     # lambda1 = lambda epoch: 0.1 if epoch in [85, 125, 165] else 1.0
 69 |     # scheduler = LambdaLR(opt, lr_lambda=lambda1)
 70 | 
 71 |     return opt
 72 | 
 73 | def get_adam(args, model):
 74 |     lr = args.lr
 75 |     opt = optim.Adam(params=model.parameters(), lr =lr, weight_decay=0.0005)
 76 |     # opt = optim.Adam(params=model.parameters(), lr =lr)
 77 | 
 78 |     return opt
 79 | 
 80 | def reduce_lr(args, optimizer, epoch, factor=0.1):
 81 |     # if 'coco' in args.dataset:
 82 |     #     change_points = [1,2,3,4,5]
 83 |     # elif 'imagenet' in args.dataset:
 84 |     #     change_points = [1,2,3,4,5,6,7,8,9,10,11,12]
 85 |     # else:
 86 |     #     change_points = None
 87 | 
 88 |     values = args.decay_points.strip().split(',')
 89 |     try:
 90 |         change_points = map(lambda x: int(x.strip()), values)
 91 |     except ValueError:
 92 |         change_points = None
 93 | 
 94 |     if change_points is not None and epoch in change_points:
 95 |         for g in optimizer.param_groups:
 96 |             g['lr'] = g['lr']*factor
 97 |             print(epoch, g['lr'])
 98 |         return True
 99 | 
100 | def adjust_lr(args, optimizer, epoch):
101 |     if 'cifar' in args.dataset:
102 |         change_points = [80, 120, 160]
103 |     elif 'indoor' in args.dataset:
104 |         change_points = [60, 80, 100]
105 |     elif 'dog' in args.dataset:
106 |         change_points = [60, 80, 100]
107 |     elif 'voc' in args.dataset:
108 |         change_points = [30, 40]
109 |     else:
110 |         change_points = None
111 |     # else:
112 | 
113 |     # if epoch in change_points:
114 |     #     lr = args.lr * 0.1**(change_points.index(epoch)+1)
115 |     # else:
116 |     #     lr = args.lr
117 | 
118 |     if change_points is not None:
119 |         change_points = np.array(change_points)
120 |         pos = np.sum(epoch > change_points)
121 |         lr = args.lr * (0.1**pos)
122 |     else:
123 |         lr = args.lr
124 | 
125 |     for param_group in optimizer.param_groups:
126 |         param_group['lr'] = lr
127 | 


--------------------------------------------------------------------------------
/utils/pyutils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import time
  4 | import sys
  5 | 
  6 | class Logger(object):
  7 |     def __init__(self, outfile):
  8 |         self.terminal = sys.stdout
  9 |         self.log = open(outfile, "w")
 10 |         sys.stdout = self
 11 | 
 12 |     def write(self, message):
 13 |         self.terminal.write(message)
 14 |         self.log.write(message)
 15 | 
 16 |     def flush(self):
 17 |         self.terminal.flush()
 18 | 
 19 | 
 20 | class AverageMeter:
 21 |     def __init__(self, *keys):
 22 |         self.__data = dict()
 23 |         for k in keys:
 24 |             self.__data[k] = [0.0, 0]
 25 | 
 26 |     def add(self, dict):
 27 |         for k, v in dict.items():
 28 |             self.__data[k][0] += v
 29 |             self.__data[k][1] += 1
 30 | 
 31 |     def get(self, *keys):
 32 |         if len(keys) == 1:
 33 |             return self.__data[keys[0]][0] / self.__data[keys[0]][1]
 34 |         else:
 35 |             v_list = [self.__data[k][0] / self.__data[k][1] for k in keys]
 36 |             return tuple(v_list)
 37 | 
 38 |     def pop(self, key=None):
 39 |         if key is None:
 40 |             for k in self.__data.keys():
 41 |                 self.__data[k] = [0.0, 0]
 42 |         else:
 43 |             v = self.get(key)
 44 |             self.__data[key] = [0.0, 0]
 45 |             return v
 46 | 
 47 | 
 48 | class Timer:
 49 |     def __init__(self, starting_msg = None):
 50 |         self.start = time.time()
 51 |         self.stage_start = self.start
 52 | 
 53 |         if starting_msg is not None:
 54 |             print(starting_msg, time.ctime(time.time()))
 55 | 
 56 | 
 57 |     def update_progress(self, progress):
 58 |         self.elapsed = time.time() - self.start
 59 |         self.est_total = self.elapsed / progress
 60 |         self.est_remaining = self.est_total - self.elapsed
 61 |         self.est_finish = int(self.start + self.est_total)
 62 | 
 63 | 
 64 |     def str_est_finish(self):
 65 |         return str(time.ctime(self.est_finish))
 66 | 
 67 |     def get_stage_elapsed(self):
 68 |         return time.time() - self.stage_start
 69 | 
 70 |     def reset_stage(self):
 71 |         self.stage_start = time.time()
 72 | 
 73 | 
 74 | from multiprocessing.pool import ThreadPool
 75 | 
 76 | class BatchThreader:
 77 | 
 78 |     def __init__(self, func, args_list, batch_size, prefetch_size=4, processes=12):
 79 |         self.batch_size = batch_size
 80 |         self.prefetch_size = prefetch_size
 81 | 
 82 |         self.pool = ThreadPool(processes=processes)
 83 |         self.async_result = []
 84 | 
 85 |         self.func = func
 86 |         self.left_args_list = args_list
 87 |         self.n_tasks = len(args_list)
 88 | 
 89 |         # initial work
 90 |         self.__start_works(self.__get_n_pending_works())
 91 | 
 92 | 
 93 |     def __start_works(self, times):
 94 |         for _ in range(times):
 95 |             args = self.left_args_list.pop(0)
 96 |             self.async_result.append(
 97 |                 self.pool.apply_async(self.func, args))
 98 | 
 99 | 
100 |     def __get_n_pending_works(self):
101 |         return min((self.prefetch_size + 1) * self.batch_size - len(self.async_result)
102 |                    , len(self.left_args_list))
103 | 
104 | 
105 | 
106 |     def pop_results(self):
107 | 
108 |         n_inwork = len(self.async_result)
109 | 
110 |         n_fetch = min(n_inwork, self.batch_size)
111 |         rtn = [self.async_result.pop(0).get()
112 |                 for _ in range(n_fetch)]
113 | 
114 |         to_fill = self.__get_n_pending_works()
115 |         if to_fill == 0:
116 |             self.pool.close()
117 |         else:
118 |             self.__start_works(to_fill)
119 | 
120 |         return rtn
121 | 
122 | 
123 | 
124 | 
125 | def get_indices_of_pairs(radius, size):
126 | 
127 |     search_dist = []
128 | 
129 |     for x in range(1, radius):
130 |         search_dist.append((0, x))
131 | 
132 |     for y in range(1, radius):
133 |         for x in range(-radius + 1, radius):
134 |             if x * x + y * y < radius * radius:
135 |                 search_dist.append((y, x))
136 | 
137 |     radius_floor = radius - 1
138 | 
139 |     full_indices = np.reshape(np.arange(0, size[0]*size[1], dtype=np.int64),
140 |                                    (size[0], size[1]))
141 | 
142 |     cropped_height = size[0] - radius_floor
143 |     cropped_width = size[1] - 2 * radius_floor
144 | 
145 |     indices_from = np.reshape(full_indices[:-radius_floor, radius_floor:-radius_floor],
146 |                               [-1])
147 | 
148 |     indices_to_list = []
149 | 
150 |     for dy, dx in search_dist:
151 |         indices_to = full_indices[dy:dy + cropped_height,
152 |                      radius_floor + dx:radius_floor + dx + cropped_width]
153 |         indices_to = np.reshape(indices_to, [-1])
154 | 
155 |         indices_to_list.append(indices_to)
156 | 
157 |     concat_indices_to = np.concatenate(indices_to_list, axis=0)
158 | 
159 |     return indices_from, concat_indices_to
160 | 
161 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/datasets/voc.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | #
  4 | # Author: Kazuto Nakashima
  5 | # URL:    https://kazuto1011.github.io
  6 | # Date:   08 February 2019
  7 | 
  8 | from __future__ import absolute_import, print_function
  9 | 
 10 | import os.path as osp
 11 | 
 12 | import cv2
 13 | import numpy as np
 14 | import torch
 15 | from PIL import Image
 16 | from torch.utils import data
 17 | 
 18 | from .base import _BaseDataset
 19 | 
 20 | 
 21 | class VOC(_BaseDataset):
 22 |     """
 23 |     PASCAL VOC Segmentation dataset
 24 |     """
 25 | 
 26 |     def __init__(self, year=2012, **kwargs):
 27 |         self.year = year
 28 |         super(VOC, self).__init__(**kwargs)
 29 | 
 30 |     def _set_files(self):
 31 |         self.root = osp.join(self.root, "VOC{}".format(self.year))
 32 |         self.image_dir = osp.join(self.root, "JPEGImages")
 33 |         self.label_dir = osp.join(self.root, "SegmentationClass")
 34 | 
 35 |         if self.split in ["train", "trainval", "val", "test"]:
 36 |             file_list = osp.join(
 37 |                 self.root, "ImageSets/Segmentation", self.split + ".txt"
 38 |             )
 39 |             file_list = tuple(open(file_list, "r"))
 40 |             file_list = [id_.rstrip() for id_ in file_list]
 41 |             self.files = file_list
 42 |         else:
 43 |             raise ValueError("Invalid split name: {}".format(self.split))
 44 | 
 45 |     def _load_data(self, index):
 46 |         # Set paths
 47 |         image_id = self.files[index]
 48 |         image_path = osp.join(self.image_dir, image_id + ".jpg")
 49 |         label_path = osp.join(self.label_dir, image_id + ".png")
 50 |         # Load an image
 51 |         image = cv2.imread(image_path, cv2.IMREAD_COLOR).astype(np.float32)
 52 |         label = np.asarray(Image.open(label_path), dtype=np.int32)
 53 |         return image_id, image, label
 54 | 
 55 | 
 56 | class VOCAug(_BaseDataset):
 57 |     """
 58 |     PASCAL VOC Segmentation dataset with extra annotations
 59 |     """
 60 | 
 61 |     def __init__(self, year=2012, **kwargs):
 62 |         self.year = year
 63 |         super(VOCAug, self).__init__(**kwargs)
 64 | 
 65 |     def _set_files(self):
 66 |         #self.root = osp.join(self.root, "VOC{}".format(self.year))
 67 | 
 68 |         if self.split in ["train", "train_aug",  "val"]:
 69 |             file_list = osp.join(
 70 |                "./data/datasets/voc12/", self.split + ".txt"
 71 |             )
 72 |             file_list = tuple(open(file_list, "r"))
 73 |             file_list = [id_.rstrip().split(" ") for id_ in file_list]
 74 |             self.files, self.labels = list(zip(*file_list))
 75 |         else:
 76 |             raise ValueError("Invalid split name: {}".format(self.split))
 77 | 
 78 |     def _load_data(self, index):
 79 |         # Set paths
 80 |         image_id = self.files[index].split("/")[-1].split(".")[0]
 81 |         image_path = osp.join(self.root, self.files[index][1:])
 82 |         label_path = osp.join(self.root, self.labels[index][1:])
 83 |         # Load an image
 84 |         image = cv2.imread(image_path, cv2.IMREAD_COLOR).astype(np.float32)
 85 |         label = np.asarray(Image.open(label_path), dtype=np.int32)
 86 |         return image_id, image, label
 87 | 
 88 | 
 89 | if __name__ == "__main__":
 90 |     import matplotlib
 91 |     import matplotlib.pyplot as plt
 92 |     import matplotlib.cm as cm
 93 |     import torchvision
 94 |     import yaml
 95 |     from torchvision.utils import make_grid
 96 |     from tqdm import tqdm
 97 | 
 98 |     kwargs = {"nrow": 10, "padding": 50}
 99 |     batch_size = 100
100 | 
101 |     dataset = VOCAug(
102 |         root="/media/kazuto1011/Extra/VOCdevkit",
103 |         split="train_aug",
104 |         ignore_label=255,
105 |         mean_bgr=(104.008, 116.669, 122.675),
106 |         year=2012,
107 |         augment=True,
108 |         base_size=None,
109 |         crop_size=513,
110 |         scales=(0.5, 0.75, 1.0, 1.25, 1.5),
111 |         flip=True,
112 |     )
113 |     print(dataset)
114 | 
115 |     loader = data.DataLoader(dataset, batch_size=batch_size)
116 | 
117 |     for i, (image_ids, images, labels) in tqdm(
118 |         enumerate(loader), total=np.ceil(len(dataset) / batch_size), leave=False
119 |     ):
120 |         if i == 0:
121 |             mean = torch.tensor((104.008, 116.669, 122.675))[None, :, None, None]
122 |             images += mean.expand_as(images)
123 |             image = make_grid(images, pad_value=-1, **kwargs).numpy()
124 |             image = np.transpose(image, (1, 2, 0))
125 |             mask = np.zeros(image.shape[:2])
126 |             mask[(image != -1)[..., 0]] = 255
127 |             image = np.dstack((image, mask)).astype(np.uint8)
128 | 
129 |             labels = labels[:, np.newaxis, ...]
130 |             label = make_grid(labels, pad_value=255, **kwargs).numpy()
131 |             label_ = np.transpose(label, (1, 2, 0))[..., 0].astype(np.float32)
132 |             label = cm.jet_r(label_ / 21.0) * 255
133 |             mask = np.zeros(label.shape[:2])
134 |             label[..., 3][(label_ == 255)] = 0
135 |             label = label.astype(np.uint8)
136 | 
137 |             tiled_images = np.hstack((image, label))
138 |             # cv2.imwrite("./docs/datasets/voc12.png", tiled_images)
139 |             plt.imshow(np.dstack((tiled_images[..., 2::-1], tiled_images[..., 3])))
140 |             plt.show()
141 |             break
142 | 


--------------------------------------------------------------------------------
/utils/Restore.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | 
  4 | __all__ = ['restore']
  5 | 
  6 | def restore(args, model, optimizer, istrain=True, including_opt=False):
  7 |     if os.path.isfile(args.restore_from) and ('.pth' in args.restore_from):
  8 |         snapshot = args.restore_from
  9 |     else:
 10 |         restore_dir = args.snapshot_dir
 11 |         filelist = os.listdir(restore_dir)
 12 |         filelist = [x for x in filelist if os.path.isfile(os.path.join(restore_dir,x)) and x.endswith('.pth.tar')]
 13 |         if len(filelist) > 0:
 14 |             filelist.sort(key=lambda fn:os.path.getmtime(os.path.join(restore_dir, fn)), reverse=True)
 15 |             snapshot = os.path.join(restore_dir, filelist[0])
 16 |         else:
 17 |             snapshot = ''
 18 | 
 19 |     if os.path.isfile(snapshot):
 20 |         print("=> loading checkpoint '{}'".format(snapshot))
 21 |         checkpoint = torch.load(snapshot)
 22 |         _model_load(model, checkpoint)
 23 |         #try:
 24 |         #    if istrain:
 25 |         #        args.current_epoch = checkpoint['epoch'] + 1
 26 |         #        args.global_counter = checkpoint['global_counter'] + 1
 27 |         #        if including_opt:
 28 |         #            optimizer.load_state_dict(checkpoint['optimizer'])
 29 |         #    model.load_state_dict(checkpoint['state_dict'])
 30 |         #    print("=> loaded checkpoint '{}' (epoch {})".format(snapshot, checkpoint['epoch']))
 31 |         #except KeyError:
 32 |         #    print("KeyError")
 33 |         #    if args.arch=='vgg_v5_7' or args.arch=='vgg_v7' or args.arch=='vgg_v10':
 34 |         #        _model_load_v6(model, checkpoint)
 35 |         #    # elif args.arch=='vgg_v2':
 36 |         #    #     _model_load_v2(model, checkpoint)
 37 |         #    else:
 38 |         #        _model_load(model, checkpoint)
 39 |         #except KeyError:
 40 |         #    print("Loading pre-trained values failed.")
 41 |         #    raise
 42 |         print("=> loaded checkpoint '{}'".format(snapshot))
 43 |     else:
 44 |         print("=> no checkpoint found at '{}'".format(snapshot))
 45 | 
 46 | 
 47 | def _model_load(model, pretrained_dict):
 48 |     model_dict = model.state_dict()
 49 | 
 50 |     # model_dict_keys = [v.replace('module.', '') for v in model_dict.keys() if v.startswith('module.')]
 51 |     if list(model_dict.keys())[0].startswith('module.'):
 52 |         pretrained_dict = {'module.'+k: v for k, v in pretrained_dict.items()}
 53 | 
 54 |     # print pretrained_dict.keys()
 55 |     # print model.state_dict().keys()
 56 |     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()}
 57 |     print("Weights cannot be loaded:")
 58 |     print([k for k in model_dict.keys() if k not in pretrained_dict.keys()])
 59 | 
 60 |     model_dict.update(pretrained_dict)
 61 |     model.load_state_dict(model_dict)
 62 | 
 63 | def _model_load_v6(model, pretrained_dict):
 64 |     model_dict = model.state_dict()
 65 | 
 66 |     # model_dict_keys = [v.replace('module.', '') for v in model_dict.keys() if v.startswith('module.')]
 67 |     if model_dict.keys()[0].startswith('module.'):
 68 |         pretrained_dict = {'module.'+k: v for k, v in pretrained_dict.items()}
 69 | 
 70 | 
 71 |     feature2_pred_w = {'module.fc5_seg.%d.weight'%(i):'module.features.%d.weight'%(i+24) for i in range(0,5,2)}
 72 |     feature2_pred_b = {'module.fc5_seg.%d.bias'%(i):'module.features.%d.bias'%(i+24) for i in range(0,5,2)}
 73 |     # feature_erase_pred_w = {'module.fc5_seg.%d.weight'%(i):'module.features.%d.weight'%(i+24) for i in range(0,5,2)}
 74 |     # feature_erase_pred_b = {'module.fc5_seg.%d.bias'%(i):'module.features.%d.bias'%(i+24) for i in range(0,5,2)}
 75 | 
 76 |     common_pred = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()}
 77 |     print("Weights cannot be loaded:")
 78 |     print([k for k in model_dict.keys() if k not in common_pred.keys()+ feature2_pred_w.keys() + feature2_pred_b.keys()])
 79 | 
 80 |     def update_coord_dict(d):
 81 |         for k in d.keys():
 82 |             model_dict[k] = pretrained_dict[d[k]]
 83 | 
 84 |     update_coord_dict(feature2_pred_w)
 85 |     update_coord_dict(feature2_pred_b)
 86 |     # update_coord_dict(feature_erase_pred_w)
 87 |     # update_coord_dict(feature_erase_pred_b)
 88 | 
 89 | 
 90 |     model_dict.update(common_pred)
 91 |     model.load_state_dict(model_dict)
 92 | 
 93 | def _model_load_v2(model, pretrained_dict):
 94 |     model_dict = model.state_dict()
 95 | 
 96 |     # model_dict_keys = [v.replace('module.', '') for v in model_dict.keys() if v.startswith('module.')]
 97 |     if model_dict.keys()[0].startswith('module.'):
 98 |         pretrained_dict = {'module.'+k: v for k, v in pretrained_dict.items()}
 99 | 
100 | 
101 |     fc5_cls_w = {'module.fc5_cls.%d.weight'%(i):'module.features.%d.weight'%(i+24) for i in range(0,5,2)}
102 |     fc5_cls_b = {'module.fc5_cls.%d.bias'%(i):'module.features.%d.bias'%(i+24) for i in range(0,5,2)}
103 |     fc5_seg_w = {'module.fc5_seg.%d.weight'%(i):'module.features.%d.weight'%(i+24) for i in range(0,5,2)}
104 |     fc5_seg_b = {'module.fc5_seg.%d.bias'%(i):'module.features.%d.bias'%(i+24) for i in range(0,5,2)}
105 | 
106 |     common_pred = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()}
107 |     print("Weights cannot be loaded:")
108 |     print([k for k in model_dict.keys() if k not in common_pred.keys()+fc5_cls_w.keys()+
109 |            fc5_cls_b.keys() + fc5_seg_w.keys() + fc5_seg_b.keys()])
110 | 
111 |     def update_coord_dict(d):
112 |         for k in d.keys():
113 |             model_dict[k] = pretrained_dict[d[k]]
114 | 
115 |     update_coord_dict(fc5_cls_w)
116 |     update_coord_dict(fc5_cls_b)
117 |     update_coord_dict(fc5_seg_w)
118 |     update_coord_dict(fc5_seg_b)
119 | 
120 | 
121 |     model_dict.update(common_pred)
122 |     model.load_state_dict(model_dict)
123 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/data/datasets/cocostuff/cocostuff_hierarchy.yaml:
--------------------------------------------------------------------------------
  1 | things:
  2 |     indoor-super-things:
  3 |         appliance-things:
  4 |             - microwave
  5 |             - oven
  6 |             - toaster
  7 |             - sink
  8 |             - refrigerator
  9 |             - blender
 10 |         electronic-things:
 11 |             - tv
 12 |             - laptop
 13 |             - mouse
 14 |             - remote
 15 |             - keyboard
 16 |             - cell phone
 17 |         food-things:
 18 |             - banana
 19 |             - apple
 20 |             - sandwich
 21 |             - orange
 22 |             - broccoli
 23 |             - carrot
 24 |             - hot dog
 25 |             - pizza
 26 |             - donut
 27 |             - cake
 28 |         furniture-things:
 29 |             - chair
 30 |             - couch
 31 |             - potted plant
 32 |             - bed
 33 |             - mirror
 34 |             - dining table
 35 |             - window
 36 |             - desk
 37 |             - toilet
 38 |             - door
 39 |         indoor-things:
 40 |             - book
 41 |             - clock
 42 |             - vase
 43 |             - scissors
 44 |             - teddy bear
 45 |             - hair drier
 46 |             - toothbrush
 47 |             - hair brush
 48 |         kitchen-things:
 49 |             - bottle
 50 |             - plate
 51 |             - wine glass
 52 |             - cup
 53 |             - fork
 54 |             - knife
 55 |             - spoon
 56 |             - bowl
 57 |     outdoor-super-things:
 58 |         accessory-things:
 59 |             - hat
 60 |             - backpack
 61 |             - umbrella
 62 |             - shoe
 63 |             - eye glasses
 64 |             - handbag
 65 |             - tie
 66 |             - suitcase
 67 |         animal-things:
 68 |             - bird
 69 |             - cat
 70 |             - dog
 71 |             - horse
 72 |             - sheep
 73 |             - cow
 74 |             - elephant
 75 |             - bear
 76 |             - zebra
 77 |             - giraffe
 78 |         outdoor-things:
 79 |             - traffic light
 80 |             - fire hydrant
 81 |             - street sign
 82 |             - stop sign
 83 |             - parking meter
 84 |             - bench
 85 |         person-things:
 86 |             - person
 87 |         sports-things:
 88 |             - frisbee
 89 |             - skis
 90 |             - snowboard
 91 |             - sports ball
 92 |             - kite
 93 |             - baseball bat
 94 |             - baseball glove
 95 |             - skateboard
 96 |             - surfboard
 97 |             - tennis racket
 98 |         vehicle-things:
 99 |             - bicycle
100 |             - car
101 |             - motorcycle
102 |             - airplane
103 |             - bus
104 |             - train
105 |             - truck
106 |             - boat
107 | stuff:
108 |     indoor-super-stuff:
109 |         ceiling-stuff:
110 |             - ceiling-tile
111 |             - ceiling-other
112 |         floor-stuff:
113 |             - carpet
114 |             - floor-tile
115 |             - floor-wood
116 |             - floor-marble
117 |             - floor-stone
118 |             - floor-other
119 |         food-stuff:
120 |             - fruit
121 |             - salad
122 |             - vegetable
123 |             - food-other
124 |         furniture-stuff:
125 |             - door-stuff
126 |             - desk-stuff
127 |             - table
128 |             - shelf
129 |             - cabinet
130 |             - cupboard
131 |             - mirror-stuff
132 |             - counter
133 |             - light
134 |             - stairs
135 |             - furniture-other
136 |         rawmaterial-stuff:
137 |             - cardboard
138 |             - paper
139 |             - plastic
140 |             - metal
141 |         textile-stuff:
142 |             - rug
143 |             - mat
144 |             - towel
145 |             - napkin
146 |             - clothes
147 |             - cloth
148 |             - curtain
149 |             - blanket
150 |             - pillow
151 |             - banner
152 |             - textile-other
153 |         wall-stuff:
154 |             - wall-tile
155 |             - wall-panel
156 |             - wall-wood
157 |             - wall-brick
158 |             - wall-stone
159 |             - wall-concrete
160 |             - wall-other
161 |         window-stuff:
162 |             - window-blind
163 |             - window-other
164 |     outdoor-super-stuff:
165 |         building-stuff:
166 |             - house
167 |             - skyscraper
168 |             - bridge
169 |             - tent
170 |             - roof
171 |             - building-other
172 |         ground-stuff:
173 |             - sand
174 |             - snow
175 |             - dirt
176 |             - mud
177 |             - gravel
178 |             - road
179 |             - pavement
180 |             - railroad
181 |             - platform
182 |             - playingfield
183 |             - ground-other
184 |         plant-stuff:
185 |             - grass
186 |             - tree
187 |             - bush
188 |             - leaves
189 |             - flower
190 |             - branch
191 |             - moss
192 |             - straw
193 |             - plant-other
194 |         sky-stuff:
195 |             - clouds
196 |             - sky-other
197 |         solid-stuff:
198 |             - wood
199 |             - rock
200 |             - stone
201 |             - mountain
202 |             - hill
203 |             - solid-other
204 |         structural-stuff:
205 |             - fence
206 |             - cage
207 |             - net
208 |             - railing
209 |             - structural-other
210 |         water-stuff:
211 |             - fog
212 |             - river
213 |             - sea
214 |             - waterdrops
215 |             - water-other


--------------------------------------------------------------------------------
/deeplab-pytorch/libs/datasets/cocostuff.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | #
  4 | # Author:   Kazuto Nakashima
  5 | # URL:      http://kazuto1011.github.io
  6 | # Created:  2017-10-30
  7 | 
  8 | from __future__ import absolute_import, print_function
  9 | 
 10 | import os.path as osp
 11 | from glob import glob
 12 | 
 13 | import cv2
 14 | import numpy as np
 15 | import scipy.io as sio
 16 | import torch
 17 | from PIL import Image
 18 | from torch.utils import data
 19 | 
 20 | from .base import _BaseDataset
 21 | 
 22 | 
 23 | class CocoStuff10k(_BaseDataset):
 24 |     """COCO-Stuff 10k dataset"""
 25 | 
 26 |     def __init__(self, warp_image=True, **kwargs):
 27 |         self.warp_image = warp_image
 28 |         super(CocoStuff10k, self).__init__(**kwargs)
 29 | 
 30 |     def _set_files(self):
 31 |         # Create data list via {train, test, all}.txt
 32 |         if self.split in ["train", "test", "all"]:
 33 |             file_list = osp.join(self.root, "imageLists", self.split + ".txt")
 34 |             file_list = tuple(open(file_list, "r"))
 35 |             file_list = [id_.rstrip() for id_ in file_list]
 36 |             self.files = file_list
 37 |         else:
 38 |             raise ValueError("Invalid split name: {}".format(self.split))
 39 | 
 40 |     def _load_data(self, index):
 41 |         # Set paths
 42 |         image_id = self.files[index]
 43 |         image_path = osp.join(self.root, "images", image_id + ".jpg")
 44 |         label_path = osp.join(self.root, "annotations", image_id + ".mat")
 45 |         # Load an image and label
 46 |         image = cv2.imread(image_path, cv2.IMREAD_COLOR).astype(np.float32)
 47 |         label = sio.loadmat(label_path)["S"]
 48 |         label -= 1  # unlabeled (0 -> -1)
 49 |         label[label == -1] = 255
 50 |         # Warping: this is just for reproducing the official scores on GitHub
 51 |         if self.warp_image:
 52 |             image = cv2.resize(image, (513, 513), interpolation=cv2.INTER_LINEAR)
 53 |             label = Image.fromarray(label).resize((513, 513), resample=Image.NEAREST)
 54 |             label = np.asarray(label)
 55 |         return image_id, image, label
 56 | 
 57 | 
 58 | class CocoStuff164k(_BaseDataset):
 59 |     """COCO-Stuff 164k dataset"""
 60 | 
 61 |     def __init__(self, **kwargs):
 62 |         super(CocoStuff164k, self).__init__(**kwargs)
 63 | 
 64 |     def _set_files(self):
 65 |         # Create data list by parsing the "images" folder
 66 |         if self.split in ["train2017", "val2017"]:
 67 |             file_list = sorted(glob(osp.join(self.root, "images", self.split, "*.jpg")))
 68 |             assert len(file_list) > 0, "{} has no image".format(
 69 |                 osp.join(self.root, "images", self.split)
 70 |             )
 71 |             file_list = [f.split("/")[-1].replace(".jpg", "") for f in file_list]
 72 |             self.files = file_list
 73 |         else:
 74 |             raise ValueError("Invalid split name: {}".format(self.split))
 75 | 
 76 |     def _load_data(self, index):
 77 |         # Set paths
 78 |         image_id = self.files[index]
 79 |         image_path = osp.join(self.root, "images", self.split, image_id + ".jpg")
 80 |         label_path = osp.join(self.root, "annotations", self.split, image_id + ".png")
 81 |         # Load an image and label
 82 |         image = cv2.imread(image_path, cv2.IMREAD_COLOR).astype(np.float32)
 83 |         label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE)
 84 |         return image_id, image, label
 85 | 
 86 | 
 87 | def get_parent_class(value, dictionary):
 88 |     # Get parent class with COCO-Stuff hierarchy
 89 |     for k, v in dictionary.items():
 90 |         if isinstance(v, list):
 91 |             if value in v:
 92 |                 yield k
 93 |         elif isinstance(v, dict):
 94 |             if value in list(v.keys()):
 95 |                 yield k
 96 |             else:
 97 |                 for res in get_parent_class(value, v):
 98 |                     yield res
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     import matplotlib
103 |     import matplotlib.pyplot as plt
104 |     import matplotlib.cm as cm
105 |     import torchvision
106 |     import yaml
107 |     from torchvision.utils import make_grid
108 |     from tqdm import tqdm
109 | 
110 |     kwargs = {"nrow": 10, "padding": 50}
111 |     batch_size = 100
112 | 
113 |     dataset = CocoStuff164k(
114 |         root="/media/kazuto1011/Extra/cocostuff/cocostuff-164k",
115 |         split="train2017",
116 |         ignore_label=255,
117 |         mean_bgr=(104.008, 116.669, 122.675),
118 |         augment=True,
119 |         crop_size=321,
120 |         scales=(0.5, 0.75, 1.0, 1.25, 1.5),
121 |         flip=True,
122 |     )
123 |     print(dataset)
124 | 
125 |     loader = data.DataLoader(dataset, batch_size=batch_size)
126 | 
127 |     for i, (image_ids, images, labels) in tqdm(
128 |         enumerate(loader), total=np.ceil(len(dataset) / batch_size), leave=False
129 |     ):
130 |         if i == 0:
131 |             mean = torch.tensor((104.008, 116.669, 122.675))[None, :, None, None]
132 |             images += mean.expand_as(images)
133 |             image = make_grid(images, pad_value=-1, **kwargs).numpy()
134 |             image = np.transpose(image, (1, 2, 0))
135 |             mask = np.zeros(image.shape[:2])
136 |             mask[(image != -1)[..., 0]] = 255
137 |             image = np.dstack((image, mask)).astype(np.uint8)
138 | 
139 |             labels = labels[:, np.newaxis, ...]
140 |             label = make_grid(labels, pad_value=255, **kwargs).numpy()
141 |             label_ = np.transpose(label, (1, 2, 0))[..., 0].astype(np.float32)
142 |             label = cm.jet_r(label_ / 182.0) * 255
143 |             mask = np.zeros(label.shape[:2])
144 |             label[..., 3][(label_ == 255)] = 0
145 |             label = label.astype(np.uint8)
146 | 
147 |             tiled_images = np.hstack((image, label))
148 |             # cv2.imwrite("./docs/datasets/cocostuff.png", tiled_images)
149 |             plt.imshow(np.dstack((tiled_images[..., 2::-1], tiled_images[..., 3])))
150 |             plt.show()
151 |             break
152 | 
153 |     class_hierarchy = "./data/datasets/cocostuff/cocostuff_hierarchy.yaml"
154 |     data = yaml.load(open(class_hierarchy))
155 |     key = "person"
156 | 
157 |     for _ in range(3):
158 |         key = get_parent_class(key, data)
159 |         key = list(key)[0]
160 |         print(key)
161 | 


--------------------------------------------------------------------------------
/utils/torchutils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | from torch.utils.data import Dataset
  4 | from PIL import Image
  5 | import os.path
  6 | import random
  7 | import numpy as np
  8 | from tool import imutils
  9 | 
 10 | class PolyOptimizer(torch.optim.SGD):
 11 | 
 12 |     def __init__(self, params, lr, weight_decay, max_step, momentum=0.9):
 13 |         super().__init__(params, lr, weight_decay)
 14 | 
 15 |         self.global_step = 0
 16 |         self.max_step = max_step
 17 |         self.momentum = momentum
 18 | 
 19 |         self.__initial_lr = [group['lr'] for group in self.param_groups]
 20 | 
 21 | 
 22 |     def step(self, closure=None):
 23 | 
 24 |         if self.global_step < self.max_step:
 25 |             lr_mult = (1 - self.global_step / self.max_step) ** self.momentum
 26 | 
 27 |             for i in range(len(self.param_groups)):
 28 |                 self.param_groups[i]['lr'] = self.__initial_lr[i] * lr_mult
 29 |         super().step(closure)
 30 | 
 31 |         self.global_step += 1
 32 | 
 33 | 
 34 | class BatchNorm2dFixed(torch.nn.Module):
 35 | 
 36 |     def __init__(self, num_features, eps=1e-5):
 37 |         super(BatchNorm2dFixed, self).__init__()
 38 |         self.num_features = num_features
 39 |         self.eps = eps
 40 |         self.weight = torch.nn.Parameter(torch.Tensor(num_features))
 41 |         self.bias = torch.nn.Parameter(torch.Tensor(num_features))
 42 |         self.register_buffer('running_mean', torch.zeros(num_features))
 43 |         self.register_buffer('running_var', torch.ones(num_features))
 44 | 
 45 | 
 46 |     def forward(self, input):
 47 | 
 48 |         return F.batch_norm(
 49 |             input, self.running_mean, self.running_var, self.weight, self.bias,
 50 |             False, eps=self.eps)
 51 | 
 52 |     def __call__(self, x):
 53 |         return self.forward(x)
 54 | 
 55 | 
 56 | class SegmentationDataset(Dataset):
 57 |     def __init__(self, img_name_list_path, img_dir, label_dir, rescale=None, flip=False, cropsize=None,
 58 |                  img_transform=None, mask_transform=None):
 59 |         self.img_name_list_path = img_name_list_path
 60 |         self.img_dir = img_dir
 61 |         self.label_dir = label_dir
 62 | 
 63 |         self.img_transform = img_transform
 64 |         self.mask_transform = mask_transform
 65 | 
 66 |         self.img_name_list = open(self.img_name_list_path).read().splitlines()
 67 | 
 68 |         self.rescale = rescale
 69 |         self.flip = flip
 70 |         self.cropsize = cropsize
 71 | 
 72 |     def __len__(self):
 73 |         return len(self.img_name_list)
 74 | 
 75 |     def __getitem__(self, idx):
 76 | 
 77 |         name = self.img_name_list[idx]
 78 | 
 79 |         img = Image.open(os.path.join(self.img_dir, name + '.jpg')).convert("RGB")
 80 |         mask = Image.open(os.path.join(self.label_dir, name + '.png'))
 81 | 
 82 |         if self.rescale is not None:
 83 |             s = self.rescale[0] + random.random() * (self.rescale[1] - self.rescale[0])
 84 |             adj_size = (round(img.size[0]*s/8)*8, round(img.size[1]*s/8)*8)
 85 |             img = img.resize(adj_size, resample=Image.CUBIC)
 86 |             mask = img.resize(adj_size, resample=Image.NEAREST)
 87 | 
 88 |         if self.img_transform is not None:
 89 |             img = self.img_transform(img)
 90 |         if self.mask_transform is not None:
 91 |             mask = self.mask_transform(mask)
 92 | 
 93 |         if self.cropsize is not None:
 94 |             img, mask = imutils.random_crop([img, mask], self.cropsize, (0, 255))
 95 | 
 96 |         mask = imutils.RescaleNearest(0.125)(mask)
 97 | 
 98 |         if self.flip is True and bool(random.getrandbits(1)):
 99 |             img = np.flip(img, 1).copy()
100 |             mask = np.flip(mask, 1).copy()
101 | 
102 |         img = np.transpose(img, (2, 0, 1))
103 | 
104 |         return name, img, mask
105 | 
106 | 
107 | class ExtractAffinityLabelInRadius():
108 | 
109 |     def __init__(self, cropsize, radius=5):
110 |         self.radius = radius
111 | 
112 |         self.search_dist = []
113 | 
114 |         for x in range(1, radius):
115 |             self.search_dist.append((0, x))
116 | 
117 |         for y in range(1, radius):
118 |             for x in range(-radius+1, radius):
119 |                 if x*x + y*y < radius*radius:
120 |                     self.search_dist.append((y, x))
121 | 
122 |         self.radius_floor = radius-1
123 | 
124 |         self.crop_height = cropsize - self.radius_floor
125 |         self.crop_width = cropsize - 2 * self.radius_floor
126 |         return
127 | 
128 |     def __call__(self, label):
129 | 
130 |         labels_from = label[:-self.radius_floor, self.radius_floor:-self.radius_floor]
131 |         labels_from = np.reshape(labels_from, [-1])
132 | 
133 |         labels_to_list = []
134 |         valid_pair_list = []
135 | 
136 |         for dy, dx in self.search_dist:
137 |             labels_to = label[dy:dy+self.crop_height, self.radius_floor+dx:self.radius_floor+dx+self.crop_width]
138 |             labels_to = np.reshape(labels_to, [-1])
139 | 
140 |             valid_pair = np.logical_and(np.less(labels_to, 255), np.less(labels_from, 255))
141 | 
142 |             labels_to_list.append(labels_to)
143 |             valid_pair_list.append(valid_pair)
144 | 
145 |         bc_labels_from = np.expand_dims(labels_from, 0)
146 |         concat_labels_to = np.stack(labels_to_list)
147 |         concat_valid_pair = np.stack(valid_pair_list)
148 | 
149 |         pos_affinity_label = np.equal(bc_labels_from, concat_labels_to)
150 | 
151 |         bg_pos_affinity_label = np.logical_and(pos_affinity_label, np.equal(bc_labels_from, 0)).astype(np.float32)
152 | 
153 |         fg_pos_affinity_label = np.logical_and(np.logical_and(pos_affinity_label, np.not_equal(bc_labels_from, 0)), concat_valid_pair).astype(np.float32)
154 | 
155 |         neg_affinity_label = np.logical_and(np.logical_not(pos_affinity_label), concat_valid_pair).astype(np.float32)
156 | 
157 |         return bg_pos_affinity_label, fg_pos_affinity_label, neg_affinity_label
158 | 
159 | class AffinityFromMaskDataset(SegmentationDataset):
160 |     def __init__(self, img_name_list_path, img_dir, label_dir, rescale=None, flip=False, cropsize=None,
161 |                  img_transform=None, mask_transform=None, radius=5):
162 |         super().__init__(img_name_list_path, img_dir, label_dir, rescale, flip, cropsize, img_transform, mask_transform)
163 | 
164 |         self.radius = radius
165 | 
166 |         self.extract_aff_lab_func = ExtractAffinityLabelInRadius(cropsize=cropsize//8, radius=radius)
167 | 
168 |     def __getitem__(self, idx):
169 |         name, img, mask = super().__getitem__(idx)
170 | 
171 |         aff_label = self.extract_aff_lab_func(mask)
172 | 
173 |         return name, img, aff_label
174 | 


--------------------------------------------------------------------------------
/scripts/train_iam.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | sys.path.append(os.getcwd())
  4 | 
  5 | import torch
  6 | import argparse
  7 | import time
  8 | import shutil
  9 | import json
 10 | import my_optim
 11 | import torch.optim as optim
 12 | from models import vgg1
 13 | import torch.nn as nn
 14 | from torchvision import transforms
 15 | from torch.utils.data import DataLoader
 16 | import torch.nn.functional as F
 17 | from torch.autograd import Variable
 18 | from utils import AverageMeter
 19 | from utils.LoadData import train_data_loader_iam
 20 | from tqdm import trange, tqdm
 21 | 
 22 | 
 23 | def get_arguments():
 24 |     parser = argparse.ArgumentParser(description='The Pytorch code of OAA')
 25 |     parser.add_argument("--img_dir", type=str, default='', help='Directory of training images')
 26 |     parser.add_argument("--train_list", type=str, default='None')
 27 |     parser.add_argument("--test_list", type=str, default='None')
 28 |     parser.add_argument("--batch_size", type=int, default=20)
 29 |     parser.add_argument("--iter_size", type=int, default=5)
 30 |     parser.add_argument("--input_size", type=int, default=256)
 31 |     parser.add_argument("--crop_size", type=int, default=224)
 32 |     parser.add_argument("--dataset", type=str, default='imagenet')
 33 |     parser.add_argument("--num_classes", type=int, default=20)
 34 |     parser.add_argument("--lr", type=float, default=0.001)
 35 |     parser.add_argument("--weight_decay", type=float, default=0.0005)
 36 |     parser.add_argument("--decay_points", type=str, default='61')
 37 |     parser.add_argument("--epoch", type=int, default=15)
 38 |     parser.add_argument("--num_workers", type=int, default=20)
 39 |     parser.add_argument("--disp_interval", type=int, default=100)
 40 |     parser.add_argument("--snapshot_dir", type=str, default='')
 41 |     parser.add_argument("--resume", type=str, default='False')
 42 |     parser.add_argument("--global_counter", type=int, default=0)
 43 |     parser.add_argument("--current_epoch", type=int, default=0)
 44 |     parser.add_argument("--att_dir", type=str, default='./runs/exp8/')
 45 | 
 46 |     return parser.parse_args()
 47 | 
 48 | class ExLoss(nn.Module):
 49 |     def __init__(self):
 50 |         super(ExLoss, self).__init__()
 51 | 
 52 |     def forward(self, input, target):
 53 |         assert(input.size() == target.size())
 54 |         pos = torch.gt(target, 0.001)
 55 |         neg = torch.le(target, 0.001)
 56 |         pos_loss = -target[pos] * torch.log(torch.sigmoid(input[pos]))
 57 |         neg_loss = -torch.log(1 - torch.sigmoid(input[neg]) + 1e-8)
 58 |       
 59 |         loss = 0.0
 60 |         num_pos = torch.sum(pos)
 61 |         num_neg = torch.sum(neg)
 62 |         # print(num_pos, num_neg)
 63 |         if num_pos > 0:
 64 |             loss += 1.0 / num_pos.float() * torch.sum(pos_loss)
 65 |         if num_neg > 0:
 66 |             loss += 1.0 / num_neg.float() * torch.sum(neg_loss)
 67 |       
 68 |         return loss
 69 | 
 70 | 
 71 | def save_checkpoint(args, state, is_best, filename='checkpoint.pth.tar'):
 72 |     savepath = os.path.join(args.snapshot_dir, filename)
 73 |     torch.save(state, savepath)
 74 |     if is_best:
 75 |         shutil.copyfile(savepath, os.path.join(args.snapshot_dir, 'model_best.pth.tar'))
 76 | 
 77 | def get_model(args):
 78 |     model = vgg1.vgg16(pretrained=True, num_classes=args.num_classes)
 79 |     model = torch.nn.DataParallel(model).cuda()
 80 |     param_groups = model.module.get_parameter_groups()
 81 |     optimizer = optim.SGD([
 82 |         {'params': param_groups[0], 'lr': args.lr},
 83 |         {'params': param_groups[1], 'lr': 2*args.lr},
 84 |         {'params': param_groups[2], 'lr': 10*args.lr},
 85 |         {'params': param_groups[3], 'lr': 20*args.lr}], momentum=0.9, weight_decay=args.weight_decay, nesterov=True)
 86 |     criterion = ExLoss()
 87 |     return  model, optimizer, criterion
 88 | 
 89 | def train(args):
 90 |     batch_time = AverageMeter()
 91 |     losses = AverageMeter()
 92 |     
 93 |     total_epoch = args.epoch
 94 |     global_counter = args.global_counter
 95 |     current_epoch = args.current_epoch
 96 | 
 97 |     train_loader = train_data_loader_iam(args)
 98 |     max_step = total_epoch*len(train_loader)
 99 |     args.max_step = max_step 
100 |     print('Max step:', max_step)
101 |     
102 |     model, optimizer, criterion = get_model(args)
103 |     print(model)
104 |     model.train()
105 |     end = time.time()
106 | 
107 |     while current_epoch < total_epoch:
108 |         model.train()
109 |         losses.reset()
110 |         batch_time.reset()
111 |         res = my_optim.reduce_lr(args, optimizer, current_epoch)
112 |         steps_per_epoch = len(train_loader)
113 |         flag = 0
114 | 
115 |         for idx, dat in enumerate(train_loader):
116 |             img, label = dat
117 |             label = label.cuda(non_blocking=True)
118 |             logits = model(img)
119 |             
120 |             if len(logits.shape) == 1:
121 |                 logits = logits.reshape(label.shape)
122 |             loss_val = criterion(logits, label)
123 |             loss_val.backward()
124 | 
125 |             flag += 1
126 |             if flag == args.iter_size:
127 |                 optimizer.step()
128 |                 optimizer.zero_grad()
129 |                 flag = 0
130 | 
131 | 
132 |             losses.update(loss_val.data.item(), img.size()[0])
133 |             batch_time.update(time.time() - end)
134 |             end = time.time()
135 |             
136 |             global_counter += 1
137 |             if global_counter % 1000 == 0:
138 |                 losses.reset()
139 | 
140 |             if global_counter % args.disp_interval == 0:
141 |                 print('Epoch: [{}][{}/{}]\t'
142 |                       'LR: {:.5f}\t' 
143 |                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
144 |                         current_epoch, global_counter%len(train_loader), len(train_loader), 
145 |                         optimizer.param_groups[0]['lr'], loss=losses))
146 | 
147 |         if current_epoch == args.epoch-1:
148 |             save_checkpoint(args,
149 |                             {
150 |                                 'epoch': current_epoch,
151 |                                 'global_counter': global_counter,
152 |                                 'state_dict':model.state_dict(),
153 |                                 'optimizer':optimizer.state_dict()
154 |                             }, is_best=False,
155 |                             filename='%s_epoch_%d.pth' %(args.dataset, current_epoch))
156 |         current_epoch += 1
157 | 
158 | if __name__ == '__main__':
159 |     args = get_arguments()
160 |     print('Running parameters:\n', args)
161 |     if not os.path.exists(args.snapshot_dir):
162 |         os.makedirs(args.snapshot_dir)
163 |     train(args)
164 | 


--------------------------------------------------------------------------------
/scripts/train.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | sys.path.append(os.getcwd())
  4 | 
  5 | import torch
  6 | import argparse
  7 | import time
  8 | import shutil
  9 | import json
 10 | import my_optim
 11 | import torch.optim as optim
 12 | from models import vgg
 13 | import torch.nn as nn
 14 | from torchvision import transforms
 15 | from torch.utils.data import DataLoader
 16 | import torch.nn.functional as F
 17 | from torch.autograd import Variable
 18 | from utils import AverageMeter
 19 | from utils.LoadData import train_data_loader
 20 | from tqdm import trange, tqdm
 21 | 
 22 | 
 23 | def get_arguments():
 24 |     parser = argparse.ArgumentParser(description='The Pytorch code of OAA')
 25 |     parser.add_argument("--img_dir", type=str, default='', help='Directory of training images')
 26 |     parser.add_argument("--train_list", type=str, default='None')
 27 |     parser.add_argument("--test_list", type=str, default='None')
 28 |     parser.add_argument("--batch_size", type=int, default=1)
 29 |     parser.add_argument("--iter_size", type=int, default=5)
 30 |     parser.add_argument("--input_size", type=int, default=256)
 31 |     parser.add_argument("--crop_size", type=int, default=224)
 32 |     parser.add_argument("--dataset", type=str, default='imagenet')
 33 |     parser.add_argument("--num_classes", type=int, default=20)
 34 |     parser.add_argument("--lr", type=float, default=0.001)
 35 |     parser.add_argument("--weight_decay", type=float, default=0.0005)
 36 |     parser.add_argument("--decay_points", type=str, default='61')
 37 |     parser.add_argument("--epoch", type=int, default=15)
 38 |     parser.add_argument("--num_workers", type=int, default=20)
 39 |     parser.add_argument("--disp_interval", type=int, default=100)
 40 |     parser.add_argument("--snapshot_dir", type=str, default='')
 41 |     parser.add_argument("--resume", type=str, default='False')
 42 |     parser.add_argument("--global_counter", type=int, default=0)
 43 |     parser.add_argument("--current_epoch", type=int, default=0)
 44 |     parser.add_argument("--att_dir", type=str, default='./runs/exp1/att/')
 45 |     parser.add_argument("--accu_dir", type=str, default='./runs/exp1/accu_att/')
 46 |     parser.add_argument('--drop_layer', action='store_true')
 47 |     parser.add_argument('--drop_rate', type=float, default=0.5)
 48 |     parser.add_argument('--drop_threshold', type=float, default=0.6)
 49 | 
 50 |     return parser.parse_args()
 51 | 
 52 | def save_checkpoint(args, state, is_best, filename='checkpoint.pth.tar'):
 53 |     savepath = os.path.join(args.snapshot_dir, filename)
 54 |     torch.save(state, savepath)
 55 |     if is_best:
 56 |         shutil.copyfile(savepath, os.path.join(args.snapshot_dir, 'model_best.pth.tar'))
 57 | 
 58 | def get_model(args):
 59 |     model = vgg.vgg16(pretrained=True, num_classes=args.num_classes, att_dir=args.att_dir, accu_dir=args.accu_dir, 
 60 |                 training_epoch=args.epoch, drop_layer=args.drop_layer, drop_rate=args.drop_rate, drop_threshold=args.drop_threshold)
 61 |     model = torch.nn.DataParallel(model).cuda()
 62 |     param_groups = model.module.get_parameter_groups()
 63 |     optimizer = optim.SGD([
 64 |         {'params': param_groups[0], 'lr': args.lr},
 65 |         {'params': param_groups[1], 'lr': 2*args.lr},
 66 |         {'params': param_groups[2], 'lr': 10*args.lr},
 67 |         {'params': param_groups[3], 'lr': 20*args.lr}], momentum=0.9, weight_decay=args.weight_decay, nesterov=True)
 68 | 
 69 |     return  model, optimizer
 70 | 
 71 | 
 72 | def validate(model, val_loader):
 73 |     
 74 |     print('\nvalidating ... ', flush=True, end='')
 75 |     val_loss = AverageMeter()
 76 |     model.eval()
 77 |     
 78 |     with torch.no_grad():
 79 |         for idx, dat in tqdm(enumerate(val_loader)):
 80 |             img_name, img, label = dat
 81 |             label = label.cuda(non_blocking=True)
 82 |             logits = model(img)
 83 |             if len(logits.shape) == 1:
 84 |                 logits = logits.reshape(label.shape)
 85 |             loss_val = F.multilabel_soft_margin_loss(logits, label)   
 86 |             val_loss.update(loss_val.data.item(), img.size()[0])
 87 | 
 88 |     print('validating loss:', val_loss.avg)
 89 | 
 90 | def train(args):
 91 |     batch_time = AverageMeter()
 92 |     losses = AverageMeter()
 93 |     
 94 |     total_epoch = args.epoch
 95 |     global_counter = args.global_counter
 96 |     current_epoch = args.current_epoch
 97 | 
 98 |     train_loader, val_loader = train_data_loader(args)
 99 |     max_step = total_epoch*len(train_loader)
100 |     args.max_step = max_step 
101 |     print('Max step:', max_step)
102 |     
103 |     model, optimizer = get_model(args)
104 |     print(model)
105 |     model.train()
106 |     end = time.time()
107 | 
108 |     while current_epoch < total_epoch:
109 |         model.train()
110 |         losses.reset()
111 |         batch_time.reset()
112 |         res = my_optim.reduce_lr(args, optimizer, current_epoch)
113 |         steps_per_epoch = len(train_loader)
114 |         
115 |         validate(model, val_loader)
116 |         index = 0  
117 |         flag = 0
118 |         for idx, dat in enumerate(train_loader):
119 |             img_name, img, label = dat
120 |             label = label.cuda(non_blocking=True)
121 |             
122 |             logits = model(img, current_epoch, label, index)
123 |             index += args.batch_size
124 | 
125 |             if len(logits.shape) == 1:
126 |                 logits = logits.reshape(label.shape)
127 |             loss_val = F.multilabel_soft_margin_loss(logits, label) / args.iter_size
128 |             loss_val.backward()
129 | 
130 |             flag += 1
131 |             if flag == args.iter_size:
132 |                 optimizer.step()
133 |                 optimizer.zero_grad()
134 |                 flag = 0
135 | 
136 |             losses.update(loss_val.data.item(), img.size()[0])
137 |             batch_time.update(time.time() - end)
138 |             end = time.time()
139 |             
140 |             global_counter += 1
141 |             if global_counter % 1000 == 0:
142 |                 losses.reset()
143 | 
144 |             if global_counter % args.disp_interval == 0:
145 |                 print('Epoch: [{}][{}/{}]\t'
146 |                       'LR: {:.5f}\t' 
147 |                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
148 |                         current_epoch, global_counter%len(train_loader), len(train_loader), 
149 |                         optimizer.param_groups[0]['lr'], loss=losses))
150 | 
151 |         if current_epoch == args.epoch-1:
152 |             save_checkpoint(args,
153 |                             {
154 |                                 'epoch': current_epoch,
155 |                                 'global_counter': global_counter,
156 |                                 'state_dict':model.state_dict(),
157 |                                 'optimizer':optimizer.state_dict()
158 |                             }, is_best=False,
159 |                             filename='%s_epoch_%d.pth' %(args.dataset, current_epoch))
160 |         current_epoch += 1
161 | 
162 | if __name__ == '__main__':
163 |     args = get_arguments()
164 |     print('Running parameters:\n', args)
165 |     if not os.path.exists(args.snapshot_dir):
166 |         os.makedirs(args.snapshot_dir)
167 |     train(args)
168 | 


--------------------------------------------------------------------------------
/models/vgg.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.utils.model_zoo as model_zoo
  4 | import torch.nn.functional as F
  5 | import math
  6 | import cv2
  7 | import numpy as np
  8 | import os
  9 | import random 
 10 | 
 11 | model_urls = {'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth'}
 12 | 
 13 | class VGG(nn.Module):
 14 | 
 15 |     def __init__(self, features, num_classes=1000, init_weights=True, att_dir='./runs/', accu_dir='./runs/', 
 16 |         training_epoch=15, drop_layer=False, drop_rate=0.0, drop_threshold=0.0):
 17 |         super(VGG, self).__init__()
 18 |         self.features = features
 19 |         self.extra_convs = nn.Sequential(
 20 |             nn.Conv2d(512, 512, kernel_size=3, padding=1),
 21 |             nn.ReLU(True),
 22 |             nn.Conv2d(512, 512, kernel_size=3, padding=1),
 23 |             nn.ReLU(True),
 24 |             nn.Conv2d(512, 512, kernel_size=3, padding=1),
 25 |             nn.ReLU(True),
 26 |             nn.Conv2d(512,20,1)            
 27 |         )
 28 |         self._initialize_weights()
 29 |         self.training_epoch = training_epoch
 30 |         self.att_dir = att_dir
 31 |         self.accu_dir = accu_dir
 32 |         
 33 |         self.drop_layer = drop_layer
 34 |         self.drop_rate = drop_rate
 35 |         self.drop_threshold = drop_threshold
 36 | 
 37 |         if self.drop_layer and not os.path.exists(self.att_dir):
 38 |             os.makedirs(self.att_dir)
 39 |         if not os.path.exists(self.accu_dir):
 40 |             os.makedirs(self.accu_dir)
 41 | 
 42 |         
 43 |     def forward(self, x, epoch=1, label=None, index=None):
 44 |         h, w = x.shape[-2:]
 45 |         if self.drop_layer and label!=None:
 46 |             if random.uniform(0, 1) < self.drop_rate:
 47 |                 ind = torch.nonzero(label)
 48 |                 for i in range(ind.shape[0]):
 49 |                     batch_index, la = ind[i]
 50 |                     att_img_path = '{}/{}_{}.png'.format(self.att_dir, batch_index+index, la)
 51 |                     if os.path.exists(att_img_path):
 52 |                         att = cv2.resize(cv2.imread(att_img_path, 0), (w, h)) / 255.0
 53 |                         x[:, :, att > self.drop_threshold] = 0.0                        
 54 | 
 55 |         x = self.features(x)
 56 |         x = self.extra_convs(x)
 57 |         
 58 |         self.map1 = x.clone().detach()
 59 |         x = F.avg_pool2d(x, kernel_size=(x.size(2), x.size(3)), padding=0)
 60 |         x = x.view(-1, 20)
 61 |         
 62 |         ###  the online attention accumulation process
 63 |         pre_probs = x.clone().detach()
 64 |         probs = torch.sigmoid(pre_probs)  # compute the prob
 65 |         pred_inds_sort = torch.argsort(-probs)
 66 | 
 67 |         if index != None and epoch > 0:
 68 |             atts = self.map1
 69 |             atts[atts < 0] = 0
 70 |             ind = torch.nonzero(label)
 71 |             num_labels = torch.sum(label, dim=1).long()
 72 | 
 73 |             for i in range(ind.shape[0]):
 74 |                 batch_index, la = ind[i]
 75 |                 pred_ind_select = pred_inds_sort[batch_index, :num_labels[batch_index]]
 76 | 
 77 |                 accu_map_name = '{}/{}_{}.png'.format(self.accu_dir, batch_index+index, la)
 78 |                 att_map_name = '{}/{}_{}.png'.format(self.att_dir, batch_index+index, la)
 79 |                 att = atts[batch_index, la].cpu().data.numpy()
 80 |                 att = att / (att.max() + 1e-8) * 255
 81 |                 
 82 |                 # if this is the last epoch and the image without any accumulation
 83 |                 if epoch == self.training_epoch - 1 and not os.path.exists(accu_map_name):
 84 |                     cv2.imwrite(accu_map_name, att)
 85 |                     continue
 86 |                 
 87 |                 #naive filter out the low quality attention map with prob
 88 |                 if la not in list(pred_ind_select):  
 89 |                     continue
 90 | 
 91 |                 if not os.path.exists(accu_map_name):
 92 |                     cv2.imwrite(accu_map_name, att)
 93 |                 else:
 94 |                     accu_att = cv2.imread(accu_map_name, 0)
 95 |                     accu_att = np.maximum(accu_att, att)
 96 |                     cv2.imwrite(accu_map_name,  accu_att)
 97 | 
 98 |                 # save current attention maps for oaa drop layer
 99 |                 if self.drop_layer:
100 |                     cv2.imwrite(att_map_name,  att)
101 | 
102 |          ##############################################
103 | 
104 |         return x
105 | 
106 |     def get_heatmaps(self):
107 |         return self.map1
108 | 
109 |     def _initialize_weights(self):
110 |         for m in self.modules():
111 |             if isinstance(m, nn.Conv2d):
112 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
113 |                 m.weight.data.normal_(0, 0.01)
114 |                 # m.weight.data.normal_(0, math.sqrt(2. / n))
115 |                 if m.bias is not None:
116 |                     m.bias.data.zero_()
117 |             elif isinstance(m, nn.BatchNorm2d):
118 |                 m.weight.data.fill_(1)
119 |                 m.bias.data.zero_()
120 |             elif isinstance(m, nn.Linear):
121 |                 m.weight.data.normal_(0, 0.01)
122 |                 m.bias.data.zero_()
123 |     
124 |     def get_parameter_groups(self):
125 |         groups = ([], [], [], [])
126 | 
127 |         for name, value in self.named_parameters():
128 | 
129 |             if 'extra' in name:
130 |                 if 'weight' in name:
131 |                     groups[2].append(value)
132 |                 else:
133 |                     groups[3].append(value)
134 |             else:
135 |                 if 'weight' in name:
136 |                     groups[0].append(value)
137 |                 else:
138 |                     groups[1].append(value)
139 |         return groups
140 | 
141 | 
142 | def make_layers(cfg, batch_norm=False):
143 |     layers = []
144 |     in_channels = 3
145 |     for i, v in enumerate(cfg):
146 |         if v == 'M':
147 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
148 |         elif v == 'N':
149 |             layers += [nn.MaxPool2d(kernel_size=3, stride=1, padding=1)]
150 |         else:
151 |             if i > 13:
152 |                 conv2d = nn.Conv2d(in_channels, v, kernel_size=3, dilation=2, padding=2)            
153 |             else:
154 |                 conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
155 |             if batch_norm:
156 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
157 |             else:
158 |                 layers += [conv2d, nn.ReLU(inplace=True)]
159 |             in_channels = v
160 |     return nn.Sequential(*layers)
161 | 
162 | 
163 | cfg = {
164 |     'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
165 |     'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
166 |     'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
167 |     'D1': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'N', 512, 512, 512],
168 |     'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
169 | }
170 | 
171 | 
172 | def vgg16(pretrained=False, **kwargs):
173 |     model = VGG(make_layers(cfg['D1']), **kwargs)  
174 |     if pretrained:
175 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg16']), strict=False)
176 |     return model
177 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | #
  4 | # Author: Kazuto Nakashima
  5 | # URL:    https://kazuto1011.github.io
  6 | # Date:   07 January 2019
  7 | 
  8 | from __future__ import absolute_import, division, print_function
  9 | 
 10 | import click
 11 | import cv2
 12 | import matplotlib
 13 | import matplotlib.cm as cm
 14 | import matplotlib.pyplot as plt
 15 | import numpy as np
 16 | import torch
 17 | import torch.nn as nn
 18 | import torch.nn.functional as F
 19 | from omegaconf import OmegaConf
 20 | 
 21 | from libs.models import *
 22 | from libs.utils import DenseCRF
 23 | 
 24 | 
 25 | def get_device(cuda):
 26 |     cuda = cuda and torch.cuda.is_available()
 27 |     device = torch.device("cuda" if cuda else "cpu")
 28 |     if cuda:
 29 |         current_device = torch.cuda.current_device()
 30 |         print("Device:", torch.cuda.get_device_name(current_device))
 31 |     else:
 32 |         print("Device: CPU")
 33 |     return device
 34 | 
 35 | 
 36 | def get_classtable(CONFIG):
 37 |     with open(CONFIG.DATASET.LABELS) as f:
 38 |         classes = {}
 39 |         for label in f:
 40 |             label = label.rstrip().split("\t")
 41 |             classes[int(label[0])] = label[1].split(",")[0]
 42 |     return classes
 43 | 
 44 | 
 45 | def setup_postprocessor(CONFIG):
 46 |     # CRF post-processor
 47 |     postprocessor = DenseCRF(
 48 |         iter_max=CONFIG.CRF.ITER_MAX,
 49 |         pos_xy_std=CONFIG.CRF.POS_XY_STD,
 50 |         pos_w=CONFIG.CRF.POS_W,
 51 |         bi_xy_std=CONFIG.CRF.BI_XY_STD,
 52 |         bi_rgb_std=CONFIG.CRF.BI_RGB_STD,
 53 |         bi_w=CONFIG.CRF.BI_W,
 54 |     )
 55 |     return postprocessor
 56 | 
 57 | 
 58 | def preprocessing(image, device, CONFIG):
 59 |     # Resize
 60 |     scale = CONFIG.IMAGE.SIZE.TEST / max(image.shape[:2])
 61 |     image = cv2.resize(image, dsize=None, fx=scale, fy=scale)
 62 |     raw_image = image.astype(np.uint8)
 63 | 
 64 |     # Subtract mean values
 65 |     image = image.astype(np.float32)
 66 |     image -= np.array(
 67 |         [
 68 |             float(CONFIG.IMAGE.MEAN.B),
 69 |             float(CONFIG.IMAGE.MEAN.G),
 70 |             float(CONFIG.IMAGE.MEAN.R),
 71 |         ]
 72 |     )
 73 | 
 74 |     # Convert to torch.Tensor and add "batch" axis
 75 |     image = torch.from_numpy(image.transpose(2, 0, 1)).float().unsqueeze(0)
 76 |     image = image.to(device)
 77 | 
 78 |     return image, raw_image
 79 | 
 80 | 
 81 | def inference(model, image, raw_image=None, postprocessor=None):
 82 |     _, _, H, W = image.shape
 83 | 
 84 |     # Image -> Probability map
 85 |     logits = model(image)
 86 |     logits = F.interpolate(logits, size=(H, W), mode="bilinear", align_corners=False)
 87 |     probs = F.softmax(logits, dim=1)[0]
 88 |     probs = probs.cpu().numpy()
 89 | 
 90 |     # Refine the prob map with CRF
 91 |     if postprocessor and raw_image is not None:
 92 |         probs = postprocessor(raw_image, probs)
 93 | 
 94 |     labelmap = np.argmax(probs, axis=0)
 95 | 
 96 |     return labelmap
 97 | 
 98 | 
 99 | @click.group()
100 | @click.pass_context
101 | def main(ctx):
102 |     """
103 |     Demo with a trained model
104 |     """
105 | 
106 |     print("Mode:", ctx.invoked_subcommand)
107 | 
108 | 
109 | @main.command()
110 | @click.option(
111 |     "-c",
112 |     "--config-path",
113 |     type=click.File(),
114 |     required=True,
115 |     help="Dataset configuration file in YAML",
116 | )
117 | @click.option(
118 |     "-m",
119 |     "--model-path",
120 |     type=click.Path(exists=True),
121 |     required=True,
122 |     help="PyTorch model to be loaded",
123 | )
124 | @click.option(
125 |     "-i",
126 |     "--image-path",
127 |     type=click.Path(exists=True),
128 |     required=True,
129 |     help="Image to be processed",
130 | )
131 | @click.option(
132 |     "--cuda/--cpu", default=True, help="Enable CUDA if available [default: --cuda]"
133 | )
134 | @click.option("--crf", is_flag=True, show_default=True, help="CRF post-processing")
135 | def single(config_path, model_path, image_path, cuda, crf):
136 |     """
137 |     Inference from a single image
138 |     """
139 | 
140 |     # Setup
141 |     CONFIG = OmegaConf.load(config_path)
142 |     device = get_device(cuda)
143 |     torch.set_grad_enabled(False)
144 | 
145 |     classes = get_classtable(CONFIG)
146 |     postprocessor = setup_postprocessor(CONFIG) if crf else None
147 | 
148 |     model = eval(CONFIG.MODEL.NAME)(n_classes=CONFIG.DATASET.N_CLASSES)
149 |     state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
150 |     model.load_state_dict(state_dict)
151 |     model.eval()
152 |     model.to(device)
153 |     print("Model:", CONFIG.MODEL.NAME)
154 | 
155 |     # Inference
156 |     image = cv2.imread(image_path, cv2.IMREAD_COLOR)
157 |     image, raw_image = preprocessing(image, device, CONFIG)
158 |     labelmap = inference(model, image, raw_image, postprocessor)
159 |     labels = np.unique(labelmap)
160 | 
161 |     # Show result for each class
162 |     rows = np.floor(np.sqrt(len(labels) + 1))
163 |     cols = np.ceil((len(labels) + 1) / rows)
164 | 
165 |     plt.figure(figsize=(10, 10))
166 |     ax = plt.subplot(rows, cols, 1)
167 |     ax.set_title("Input image")
168 |     ax.imshow(raw_image[:, :, ::-1])
169 |     ax.axis("off")
170 | 
171 |     for i, label in enumerate(labels):
172 |         mask = labelmap == label
173 |         ax = plt.subplot(rows, cols, i + 2)
174 |         ax.set_title(classes[label])
175 |         ax.imshow(raw_image[..., ::-1])
176 |         ax.imshow(mask.astype(np.float32), alpha=0.5)
177 |         ax.axis("off")
178 | 
179 |     plt.tight_layout()
180 |     plt.show()
181 | 
182 | 
183 | @main.command()
184 | @click.option(
185 |     "-c",
186 |     "--config-path",
187 |     type=click.File(),
188 |     required=True,
189 |     help="Dataset configuration file in YAML",
190 | )
191 | @click.option(
192 |     "-m",
193 |     "--model-path",
194 |     type=click.Path(exists=True),
195 |     required=True,
196 |     help="PyTorch model to be loaded",
197 | )
198 | @click.option(
199 |     "--cuda/--cpu", default=True, help="Enable CUDA if available [default: --cuda]"
200 | )
201 | @click.option("--crf", is_flag=True, show_default=True, help="CRF post-processing")
202 | @click.option("--camera-id", type=int, default=0, show_default=True, help="Device ID")
203 | def live(config_path, model_path, cuda, crf, camera_id):
204 |     """
205 |     Inference from camera stream
206 |     """
207 | 
208 |     # Setup
209 |     CONFIG = OmegaConf.load(config_path)
210 |     device = get_device(cuda)
211 |     torch.set_grad_enabled(False)
212 |     torch.backends.cudnn.benchmark = True
213 | 
214 |     classes = get_classtable(CONFIG)
215 |     postprocessor = setup_postprocessor(CONFIG) if crf else None
216 | 
217 |     model = eval(CONFIG.MODEL.NAME)(n_classes=CONFIG.DATASET.N_CLASSES)
218 |     state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
219 |     model.load_state_dict(state_dict)
220 |     model.eval()
221 |     model.to(device)
222 |     print("Model:", CONFIG.MODEL.NAME)
223 | 
224 |     # UVC camera stream
225 |     cap = cv2.VideoCapture(camera_id)
226 |     cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"YUYV"))
227 | 
228 |     def colorize(labelmap):
229 |         # Assign a unique color to each label
230 |         labelmap = labelmap.astype(np.float32) / CONFIG.DATASET.N_CLASSES
231 |         colormap = cm.jet_r(labelmap)[..., :-1] * 255.0
232 |         return np.uint8(colormap)
233 | 
234 |     def mouse_event(event, x, y, flags, labelmap):
235 |         # Show a class name of a mouse-overed pixel
236 |         label = labelmap[y, x]
237 |         name = classes[label]
238 |         print(name)
239 | 
240 |     window_name = "{} + {}".format(CONFIG.MODEL.NAME, CONFIG.DATASET.NAME)
241 |     cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
242 | 
243 |     while True:
244 |         _, frame = cap.read()
245 |         image, raw_image = preprocessing(frame, device, CONFIG)
246 |         labelmap = inference(model, image, raw_image, postprocessor)
247 |         colormap = colorize(labelmap)
248 | 
249 |         # Register mouse callback function
250 |         cv2.setMouseCallback(window_name, mouse_event, labelmap)
251 | 
252 |         # Overlay prediction
253 |         cv2.addWeighted(colormap, 0.5, raw_image, 0.5, 0.0, raw_image)
254 | 
255 |         # Quit by pressing "q" key
256 |         cv2.imshow(window_name, raw_image)
257 |         if cv2.waitKey(10) == ord("q"):
258 |             break
259 | 
260 | 
261 | if __name__ == "__main__":
262 |     main()
263 | 


--------------------------------------------------------------------------------
/utils/imutils.py:
--------------------------------------------------------------------------------
  1 | import PIL.Image
  2 | import random
  3 | import numpy as np
  4 | 
  5 | class RandomResizeLong():
  6 | 
  7 |     def __init__(self, min_long, max_long):
  8 |         self.min_long = min_long
  9 |         self.max_long = max_long
 10 | 
 11 |     def __call__(self, img):
 12 | 
 13 |         target_long = random.randint(self.min_long, self.max_long)
 14 |         w, h = img.size
 15 | 
 16 |         if w < h:
 17 |             target_shape = (int(round(w * target_long / h)), target_long)
 18 |         else:
 19 |             target_shape = (target_long, int(round(h * target_long / w)))
 20 | 
 21 |         img = img.resize(target_shape, resample=PIL.Image.CUBIC)
 22 |         return img
 23 | 
 24 | class ResizeShort():
 25 |     def __init__(self, short_size):
 26 |         self.short_size = short_size
 27 | 
 28 |     def __call__(self, img):
 29 | 
 30 |         target_long = self.short_size
 31 |         w, h = img.size
 32 | 
 33 |         if w < h:
 34 |             target_shape = (target_long, int(round(h * target_long / w)))
 35 |         else:
 36 |             target_shape = (int(round(w * target_long / h)), target_long)
 37 | 
 38 |         img = img.resize(target_shape, resample=PIL.Image.CUBIC)
 39 |         return img 
 40 | 
 41 | class RandomCrop():
 42 | 
 43 |     def __init__(self, cropsize):
 44 |         self.cropsize = cropsize
 45 | 
 46 |     def __call__(self, imgarr):
 47 | 
 48 |         c, h, w = imgarr.shape
 49 | 
 50 |         ch = min(self.cropsize, h)
 51 |         cw = min(self.cropsize, w)
 52 | 
 53 |         w_space = w - self.cropsize
 54 |         h_space = h - self.cropsize
 55 | 
 56 |         if w_space > 0:
 57 |             cont_left = 0
 58 |             img_left = random.randrange(w_space+1)
 59 |         else:
 60 |             cont_left = random.randrange(-w_space+1)
 61 |             img_left = 0
 62 | 
 63 |         if h_space > 0:
 64 |             cont_top = 0
 65 |             img_top = random.randrange(h_space+1)
 66 |         else:
 67 |             cont_top = random.randrange(-h_space+1)
 68 |             img_top = 0
 69 | 
 70 |         container = np.zeros((imgarr.shape[0], self.cropsize, self.cropsize), np.float32)
 71 |         container[:, cont_top:cont_top+ch, cont_left:cont_left+cw] = \
 72 |             imgarr[:, img_top:img_top+ch, img_left:img_left+cw]
 73 | 
 74 |         return container
 75 | 
 76 | def get_random_crop_box(imgsize, cropsize):
 77 |     h, w = imgsize
 78 | 
 79 |     ch = min(cropsize, h)
 80 |     cw = min(cropsize, w)
 81 | 
 82 |     w_space = w - cropsize
 83 |     h_space = h - cropsize
 84 | 
 85 |     if w_space > 0:
 86 |         cont_left = 0
 87 |         img_left = random.randrange(w_space + 1)
 88 |     else:
 89 |         cont_left = random.randrange(-w_space + 1)
 90 |         img_left = 0
 91 | 
 92 |     if h_space > 0:
 93 |         cont_top = 0
 94 |         img_top = random.randrange(h_space + 1)
 95 |     else:
 96 |         cont_top = random.randrange(-h_space + 1)
 97 |         img_top = 0
 98 | 
 99 |     return cont_top, cont_top+ch, cont_left, cont_left+cw, img_top, img_top+ch, img_left, img_left+cw
100 | 
101 | def crop_with_box(img, box):
102 |     if len(img.shape) == 3:
103 |         img_cont = np.zeros((max(box[1]-box[0], box[4]-box[5]), max(box[3]-box[2], box[7]-box[6]), img.shape[-1]), dtype=img.dtype)
104 |     else:
105 |         img_cont = np.zeros((max(box[1] - box[0], box[4] - box[5]), max(box[3] - box[2], box[7] - box[6])), dtype=img.dtype)
106 |     img_cont[box[0]:box[1], box[2]:box[3]] = img[box[4]:box[5], box[6]:box[7]]
107 |     return img_cont
108 | 
109 | 
110 | def random_crop(images, cropsize, fills):
111 |     if isinstance(images[0], PIL.Image.Image):
112 |         imgsize = images[0].size[::-1]
113 |     else:
114 |         imgsize = images[0].shape[:2]
115 |     box = get_random_crop_box(imgsize, cropsize)
116 | 
117 |     new_images = []
118 |     for img, f in zip(images, fills):
119 | 
120 |         if isinstance(img, PIL.Image.Image):
121 |             img = img.crop((box[6], box[4], box[7], box[5]))
122 |             cont = PIL.Image.new(img.mode, (cropsize, cropsize))
123 |             cont.paste(img, (box[2], box[0]))
124 |             new_images.append(cont)
125 | 
126 |         else:
127 |             if len(img.shape) == 3:
128 |                 cont = np.ones((cropsize, cropsize, img.shape[2]), img.dtype)*f
129 |             else:
130 |                 cont = np.ones((cropsize, cropsize), img.dtype)*f
131 |             cont[box[0]:box[1], box[2]:box[3]] = img[box[4]:box[5], box[6]:box[7]]
132 |             new_images.append(cont)
133 | 
134 |     return new_images
135 | 
136 | 
137 | class AvgPool2d():
138 | 
139 |     def __init__(self, ksize):
140 |         self.ksize = ksize
141 | 
142 |     def __call__(self, img):
143 |         import skimage.measure
144 | 
145 |         return skimage.measure.block_reduce(img, (self.ksize, self.ksize, 1), np.mean)
146 | 
147 | 
148 | class RandomHorizontalFlip():
149 |     def __init__(self):
150 |         return
151 | 
152 |     def __call__(self, img):
153 |         if bool(random.getrandbits(1)):
154 |             img = np.fliplr(img).copy()
155 |         return img
156 | 
157 | 
158 | class CenterCrop():
159 | 
160 |     def __init__(self, cropsize, default_value=0):
161 |         self.cropsize = cropsize
162 |         self.default_value = default_value
163 | 
164 |     def __call__(self, npimg):
165 | 
166 |         h, w = npimg.shape[:2]
167 | 
168 |         ch = min(self.cropsize, h)
169 |         cw = min(self.cropsize, w)
170 | 
171 |         sh = h - self.cropsize
172 |         sw = w - self.cropsize
173 | 
174 |         if sw > 0:
175 |             cont_left = 0
176 |             img_left = int(round(sw / 2))
177 |         else:
178 |             cont_left = int(round(-sw / 2))
179 |             img_left = 0
180 | 
181 |         if sh > 0:
182 |             cont_top = 0
183 |             img_top = int(round(sh / 2))
184 |         else:
185 |             cont_top = int(round(-sh / 2))
186 |             img_top = 0
187 | 
188 |         if len(npimg.shape) == 2:
189 |             container = np.ones((self.cropsize, self.cropsize), npimg.dtype)*self.default_value
190 |         else:
191 |             container = np.ones((self.cropsize, self.cropsize, npimg.shape[2]), npimg.dtype)*self.default_value
192 | 
193 |         container[cont_top:cont_top+ch, cont_left:cont_left+cw] = \
194 |             npimg[img_top:img_top+ch, img_left:img_left+cw]
195 | 
196 |         return container
197 | 
198 | 
199 | def HWC_to_CHW(img):
200 |     return np.transpose(img, (2, 0, 1))
201 | 
202 | 
203 | class RescaleNearest():
204 |     def __init__(self, scale):
205 |         self.scale = scale
206 | 
207 |     def __call__(self, npimg):
208 |         import cv2
209 |         return cv2.resize(npimg, None, fx=self.scale, fy=self.scale, interpolation=cv2.INTER_NEAREST)
210 | 
211 | 
212 | def bb_IOU(boxA, boxB):
213 |     boxA = [float(aa) for aa in boxA]
214 |     boxB = [float(bb) for bb in boxB]
215 | 
216 |     xA = max(boxA[0], boxB[0])
217 |     yA = max(boxA[1], boxB[1])
218 |     xB = min(boxA[2], boxB[2])
219 |     yB = min(boxA[3], boxB[3])
220 |     
221 |     if xA >= xB or yA >= yB:
222 |         return 0, 0
223 |     # compute the area of intersection rectangle
224 |     interArea = (xB - xA + 1) * (yB - yA + 1)
225 | 
226 |     # compute the area of both the prediction and ground-truth
227 |     # rectangles
228 |     boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
229 |     boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
230 | 
231 |     # compute the intersection over union by taking the intersection
232 |     # area and dividing it by the sum of prediction + ground-truth
233 |     # areas - the interesection area
234 |     iou = interArea / float(boxAArea + boxBArea - interArea)
235 |     recall = interArea / float(boxAArea)
236 |     # return the intersection over union value
237 |     return iou, recall
238 | 
239 | def large_rect(rect):
240 |     # find largest recteangles
241 |     large_area = 0
242 |     target = 0
243 |     for i in range(len(rect)):
244 |         area = rect[i][2]*rect[i][3]
245 |         if large_area < area:
246 |             large_area = area
247 |             target = i
248 | 
249 |     x = rect[target][0]
250 |     y = rect[target][1]
251 |     w = rect[target][2]
252 |     h = rect[target][3]
253 | 
254 |     return x, y, w, h
255 | 
256 | 
257 | def crf_inference(img, probs, t=10, scale_factor=1, labels=21):
258 |     import pydensecrf.densecrf as dcrf
259 |     from pydensecrf.utils import unary_from_softmax
260 | 
261 |     h, w = img.shape[:2]
262 |     n_labels = labels
263 | 
264 |     d = dcrf.DenseCRF2D(w, h, n_labels)
265 | 
266 |     unary = unary_from_softmax(probs)
267 |     unary = np.ascontiguousarray(unary)
268 | 
269 |     d.setUnaryEnergy(unary)
270 |     d.addPairwiseGaussian(sxy=3/scale_factor, compat=3)
271 |     d.addPairwiseBilateral(sxy=80/scale_factor, srgb=13, rgbim=np.copy(img), compat=10)
272 |     Q = d.inference(t)
273 | 
274 |     return np.array(Q).reshape((n_labels, h, w))
275 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/convert.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | #
  4 | # Author:   Kazuto Nakashima
  5 | # URL:      http://kazuto1011.github.io
  6 | # Created:  2017-11-15
  7 | 
  8 | from __future__ import absolute_import, division, print_function
  9 | 
 10 | import re
 11 | import traceback
 12 | from collections import Counter, OrderedDict
 13 | 
 14 | import click
 15 | import numpy as np
 16 | import torch
 17 | from addict import Dict
 18 | 
 19 | from libs import caffe_pb2
 20 | from libs.models import DeepLabV1_ResNet101, DeepLabV2_ResNet101_MSC
 21 | 
 22 | 
 23 | def parse_caffemodel(model_path):
 24 |     caffemodel = caffe_pb2.NetParameter()
 25 |     with open(model_path, "rb") as f:
 26 |         caffemodel.MergeFromString(f.read())
 27 | 
 28 |     # Check trainable layers
 29 |     print(
 30 |         *Counter(
 31 |             [(layer.type, len(layer.blobs)) for layer in caffemodel.layer]
 32 |         ).most_common(),
 33 |         sep="\n",
 34 |     )
 35 | 
 36 |     params = OrderedDict()
 37 |     previous_layer_type = None
 38 |     for layer in caffemodel.layer:
 39 |         # Skip the shared branch
 40 |         if "res075" in layer.name or "res05" in layer.name:
 41 |             continue
 42 | 
 43 |         print(
 44 |             "\033[34m[Caffe]\033[00m",
 45 |             "{} ({}): {}".format(layer.name, layer.type, len(layer.blobs)),
 46 |         )
 47 | 
 48 |         # Convolution or Dilated Convolution
 49 |         if "Convolution" in layer.type:
 50 |             params[layer.name] = {}
 51 |             params[layer.name]["kernel_size"] = layer.convolution_param.kernel_size[0]
 52 |             params[layer.name]["weight"] = list(layer.blobs[0].data)
 53 |             if len(layer.blobs) == 2:
 54 |                 params[layer.name]["bias"] = list(layer.blobs[1].data)
 55 |             if len(layer.convolution_param.stride) == 1:  # or []
 56 |                 params[layer.name]["stride"] = layer.convolution_param.stride[0]
 57 |             else:
 58 |                 params[layer.name]["stride"] = 1
 59 |             if len(layer.convolution_param.pad) == 1:  # or []
 60 |                 params[layer.name]["padding"] = layer.convolution_param.pad[0]
 61 |             else:
 62 |                 params[layer.name]["padding"] = 0
 63 |             if isinstance(layer.convolution_param.dilation, int):
 64 |                 params[layer.name]["dilation"] = layer.convolution_param.dilation
 65 |             elif len(layer.convolution_param.dilation) == 1:
 66 |                 params[layer.name]["dilation"] = layer.convolution_param.dilation[0]
 67 |             else:
 68 |                 params[layer.name]["dilation"] = 1
 69 |         # Fully-connected
 70 |         elif "InnerProduct" in layer.type:
 71 |             params[layer.name] = {}
 72 |             params[layer.name]["weight"] = list(layer.blobs[0].data)
 73 |             if len(layer.blobs) == 2:
 74 |                 params[layer.name]["bias"] = list(layer.blobs[1].data)
 75 |         # Batch Normalization
 76 |         elif "BatchNorm" in layer.type:
 77 |             params[layer.name] = {}
 78 |             params[layer.name]["running_mean"] = (
 79 |                 np.array(layer.blobs[0].data) / layer.blobs[2].data[0]
 80 |             )
 81 |             params[layer.name]["running_var"] = (
 82 |                 np.array(layer.blobs[1].data) / layer.blobs[2].data[0]
 83 |             )
 84 |             params[layer.name]["eps"] = layer.batch_norm_param.eps
 85 |             params[layer.name]["momentum"] = (
 86 |                 1 - layer.batch_norm_param.moving_average_fraction
 87 |             )
 88 |             params[layer.name]["num_batches_tracked"] = np.array(0)
 89 |             batch_norm_layer = layer.name
 90 |         # Scale
 91 |         elif "Scale" in layer.type:
 92 |             assert previous_layer_type == "BatchNorm"
 93 |             params[batch_norm_layer]["weight"] = list(layer.blobs[0].data)
 94 |             params[batch_norm_layer]["bias"] = list(layer.blobs[1].data)
 95 |         elif "Pooling" in layer.type:
 96 |             params[layer.name] = {}
 97 |             params[layer.name]["kernel_size"] = layer.pooling_param.kernel_size
 98 |             params[layer.name]["stride"] = layer.pooling_param.stride
 99 |             params[layer.name]["padding"] = layer.pooling_param.pad
100 | 
101 |         previous_layer_type = layer.type
102 | 
103 |     return params
104 | 
105 | 
106 | # Hard coded translater
107 | def translate_layer_name(source, target="base"):
108 |     def layer_block_branch(source, target):
109 |         target += "layer{}".format(source[0][0])
110 |         if len(source[0][1:]) == 1:
111 |             block = {"a": 1, "b": 2, "c": 3}.get(source[0][1:])
112 |         else:
113 |             block = int(source[0][2:]) + 1
114 |         target += ".block{}".format(block)
115 |         branch = source[1][6:]
116 |         if branch == "1":
117 |             target += ".shortcut"
118 |         elif branch == "2a":
119 |             target += ".reduce"
120 |         elif branch == "2b":
121 |             target += ".conv3x3"
122 |         elif branch == "2c":
123 |             target += ".increase"
124 |         return target
125 | 
126 |     source = source.split("_")
127 | 
128 |     if "pool" in source[0]:
129 |         target += "layer1.pool"
130 |     elif "fc" in source[0]:
131 |         if len(source) == 3:
132 |             stage = source[2]
133 |             target += "aspp.{}".format(stage)
134 |         else:
135 |             target += "fc"
136 |     elif "conv1" in source[0]:
137 |         target += "layer1.conv1.conv"
138 |     elif "conv1" in source[1]:
139 |         target += "layer1.conv1.bn"
140 |     elif "res" in source[0]:
141 |         source[0] = source[0].replace("res", "")
142 |         target = layer_block_branch(source, target)
143 |         target += ".conv"
144 |     elif "bn" in source[0]:
145 |         source[0] = source[0].replace("bn", "")
146 |         target = layer_block_branch(source, target)
147 |         target += ".bn"
148 | 
149 |     return target
150 | 
151 | 
152 | @click.command()
153 | @click.option(
154 |     "-d",
155 |     "--dataset",
156 |     type=click.Choice(["voc12", "coco"]),
157 |     required=True,
158 |     help="Caffemodel",
159 | )
160 | def main(dataset):
161 |     """
162 |     Convert caffemodels to pytorch models
163 |     """
164 | 
165 |     WHITELIST = ["kernel_size", "stride", "padding", "dilation", "eps", "momentum"]
166 |     CONFIG = Dict(
167 |         {
168 |             "voc12": {
169 |                 # For loading the provided VOC 2012 caffemodel
170 |                 "PATH_CAFFE_MODEL": "data/models/voc12/deeplabv2_resnet101_msc/caffemodel/train2_iter_20000.caffemodel",
171 |                 "PATH_PYTORCH_MODEL": "data/models/voc12/deeplabv2_resnet101_msc/caffemodel/deeplabv2_resnet101_msc-vocaug.pth",
172 |                 "N_CLASSES": 21,
173 |                 "MODEL": "DeepLabV2_ResNet101_MSC",
174 |                 "HEAD": "base.",
175 |             },
176 |             "coco": {
177 |                 # For loading the provided initial weights pre-trained on COCO
178 |                 "PATH_CAFFE_MODEL": "data/models/coco/deeplabv1_resnet101/caffemodel/init.caffemodel",
179 |                 "PATH_PYTORCH_MODEL": "data/models/coco/deeplabv1_resnet101/caffemodel/deeplabv1_resnet101-coco.pth",
180 |                 "N_CLASSES": 91,
181 |                 "MODEL": "DeepLabV1_ResNet101",
182 |                 "HEAD": "",
183 |             },
184 |         }.get(dataset)
185 |     )
186 | 
187 |     params = parse_caffemodel(CONFIG.PATH_CAFFE_MODEL)
188 | 
189 |     model = eval(CONFIG.MODEL)(n_classes=CONFIG.N_CLASSES)
190 |     model.eval()
191 |     reference_state_dict = model.state_dict()
192 | 
193 |     rel_tol = 1e-7
194 | 
195 |     converted_state_dict = OrderedDict()
196 |     for caffe_layer, caffe_layer_dict in params.items():
197 |         for param_name, caffe_values in caffe_layer_dict.items():
198 |             pytorch_layer = translate_layer_name(caffe_layer, CONFIG.HEAD)
199 |             if pytorch_layer:
200 |                 pytorch_param = pytorch_layer + "." + param_name
201 | 
202 |                 # Parameter check
203 |                 if param_name in WHITELIST:
204 |                     pytorch_values = eval("model." + pytorch_param)
205 |                     if isinstance(pytorch_values, tuple):
206 |                         assert (
207 |                             pytorch_values[0] == caffe_values
208 |                         ), "Inconsistent values: {} @{} (Caffe), {} @{} (PyTorch)".format(
209 |                             caffe_values,
210 |                             caffe_layer + "/" + param_name,
211 |                             pytorch_values,
212 |                             pytorch_param,
213 |                         )
214 |                     else:
215 |                         assert (
216 |                             abs(pytorch_values - caffe_values) < rel_tol
217 |                         ), "Inconsistent values: {} @{} (Caffe), {} @{} (PyTorch)".format(
218 |                             caffe_values,
219 |                             caffe_layer + "/" + param_name,
220 |                             pytorch_values,
221 |                             pytorch_param,
222 |                         )
223 |                     print(
224 |                         "\033[34m[Passed!]\033[00m",
225 |                         (caffe_layer + "/" + param_name).ljust(35),
226 |                         "->",
227 |                         pytorch_param,
228 |                     )
229 |                     continue
230 | 
231 |                 # Weight conversion
232 |                 if pytorch_param in reference_state_dict:
233 |                     caffe_values = torch.tensor(caffe_values)
234 |                     caffe_values = caffe_values.view_as(
235 |                         reference_state_dict[pytorch_param]
236 |                     )
237 |                     converted_state_dict[pytorch_param] = caffe_values
238 |                     print(
239 |                         "\033[32m[Copied!]\033[00m",
240 |                         (caffe_layer + "/" + param_name).ljust(35),
241 |                         "->",
242 |                         pytorch_param,
243 |                     )
244 | 
245 |     print("\033[32mVerify the converted model\033[00m")
246 |     model.load_state_dict(converted_state_dict)
247 | 
248 |     print('Saving to "{}"'.format(CONFIG.PATH_PYTORCH_MODEL))
249 |     torch.save(converted_state_dict, CONFIG.PATH_PYTORCH_MODEL)
250 | 
251 | 
252 | if __name__ == "__main__":
253 |     main()
254 | 


--------------------------------------------------------------------------------
/utils/LoadData.py:
--------------------------------------------------------------------------------
  1 | # from torchvision import transforms
  2 | from .transforms import transforms
  3 | from torch.utils.data import DataLoader
  4 | import torchvision
  5 | import torch
  6 | import numpy as np
  7 | from torch.utils.data import Dataset
  8 | from .imutils import ResizeShort
  9 | import os
 10 | from PIL import Image
 11 | import random
 12 | 
 13 | def train_data_loader(args, test_path=False, segmentation=False):
 14 |     if 'coco' in args.dataset:
 15 |         mean_vals = [0.471, 0.448, 0.408]
 16 |         std_vals = [0.234, 0.239, 0.242]
 17 |     else:
 18 |         mean_vals = [0.485, 0.456, 0.406]
 19 |         std_vals = [0.229, 0.224, 0.225]
 20 |        
 21 |     input_size = int(args.input_size)
 22 |     crop_size = int(args.crop_size)
 23 |     tsfm_train = transforms.Compose([#transforms.Resize(input_size),  
 24 |                                      ResizeShort(224),
 25 |                                      transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
 26 |                                      transforms.ToTensor(),
 27 |                                      transforms.Normalize(mean_vals, std_vals),
 28 |                                      ])
 29 | 
 30 |     tsfm_test = transforms.Compose([ResizeShort(224), 
 31 |                                      transforms.ToTensor(),
 32 |                                      transforms.Normalize(mean_vals, std_vals),
 33 |                                      ])
 34 | 
 35 |     img_train = VOCDataset(args.train_list, root_dir=args.img_dir, num_classes=args.num_classes, transform=tsfm_train, test=True)
 36 |     img_test = VOCDataset(args.test_list, root_dir=args.img_dir, num_classes=args.num_classes, transform=tsfm_test, test=True)
 37 | 
 38 |     train_loader = DataLoader(img_train, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)
 39 |     val_loader = DataLoader(img_test, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)
 40 | 
 41 |     return train_loader, val_loader
 42 | 
 43 | def test_data_loader(args, test_path=False, segmentation=False):
 44 |     if 'coco' in args.dataset:
 45 |         mean_vals = [0.471, 0.448, 0.408]
 46 |         std_vals = [0.234, 0.239, 0.242]
 47 |     else:
 48 |         mean_vals = [0.485, 0.456, 0.406]
 49 |         std_vals = [0.229, 0.224, 0.225]
 50 | 
 51 |     input_size = int(args.input_size)
 52 | 
 53 |     tsfm_test = transforms.Compose([#transforms.Resize(input_size),  
 54 |                                      ResizeShort(224), 
 55 |                                      transforms.ToTensor(),
 56 |                                      transforms.Normalize(mean_vals, std_vals),
 57 |                                      ])  
 58 | 
 59 |     img_test = VOCDataset(args.test_list, root_dir=args.img_dir, num_classes=args.num_classes, transform=tsfm_test, test=True)
 60 |     val_loader = DataLoader(img_test, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)
 61 | 
 62 |     return val_loader
 63 | 
 64 | def test_msf_data_loader(args, test_path=False, segmentation=False):
 65 |     if 'coco' in args.dataset:
 66 |         mean_vals = [0.471, 0.448, 0.408]
 67 |         std_vals = [0.234, 0.239, 0.242]
 68 |     else:
 69 |         mean_vals = [0.485, 0.456, 0.406]
 70 |         std_vals = [0.229, 0.224, 0.225]
 71 | 
 72 | 
 73 |     input_size = int(args.input_size)
 74 |     crop_size = int(args.crop_size)
 75 |     tsfm_test = transforms.Compose([transforms.Resize(input_size),  
 76 |                                      transforms.ToTensor(),
 77 |                                      transforms.Normalize(mean_vals, std_vals),
 78 |                                      ])
 79 | 
 80 |     img_test = VOCDatasetMSF(args.test_list, root_dir=args.img_dir, num_classes=args.num_classes, scales=args.scales, transform=tsfm_test, test=True)
 81 |     val_loader = DataLoader(img_test, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)
 82 | 
 83 |     return val_loader
 84 | 
 85 | class VOCDataset(Dataset):
 86 |     def __init__(self, datalist_file, root_dir, num_classes=20, transform=None, test=False):
 87 |         self.root_dir = root_dir
 88 |         self.testing = test
 89 |         self.datalist_file =  datalist_file
 90 |         self.transform = transform
 91 |         self.num_classes = num_classes
 92 |         self.image_list, self.label_list = self.read_labeled_image_list(self.root_dir, self.datalist_file)
 93 | 
 94 |     def __len__(self):
 95 |         return len(self.image_list)
 96 | 
 97 |     def __getitem__(self, idx):
 98 |         img_name =  self.image_list[idx]
 99 |         image = Image.open(img_name).convert('RGB')
100 |         
101 |         if self.transform is not None:
102 |             image = self.transform(image)
103 |         if self.testing:
104 |             return img_name, image, self.label_list[idx]
105 |         
106 |         return image, self.label_list[idx]
107 | 
108 |     def read_labeled_image_list(self, data_dir, data_list):
109 |         with open(data_list, 'r') as f:
110 |             lines = f.readlines()
111 |         img_name_list = []
112 |         img_labels = []
113 |         for line in lines:
114 |             fields = line.strip().split()
115 |             image = fields[0] + '.jpg'
116 |             labels = np.zeros((self.num_classes,), dtype=np.float32)
117 |             for i in range(len(fields)-1):
118 |                 index = int(fields[i+1])
119 |                 labels[index] = 1.
120 |             img_name_list.append(os.path.join(data_dir, image))
121 |             img_labels.append(labels)
122 |         return img_name_list, img_labels#, np.array(img_labels, dtype=np.float32)
123 | 
124 | ####integral attention model learning######
125 | 
126 | def train_data_loader_iam(args, test_path=False, segmentation=False):
127 |     if 'coco' in args.dataset:
128 |         mean_vals = [0.471, 0.448, 0.408]
129 |         std_vals = [0.234, 0.239, 0.242]
130 |     else:
131 |         mean_vals = [0.485, 0.456, 0.406]
132 |         std_vals = [0.229, 0.224, 0.225]
133 |        
134 |     input_size = int(args.input_size)
135 |     crop_size = int(args.crop_size)
136 |     tsfm_train = transforms.Compose([ResizeShort(224),  
137 |                                      transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
138 |                                      transforms.ToTensor(),
139 |                                      transforms.Normalize(mean_vals, std_vals),
140 |                                      ])
141 | 
142 |     img_train = VOCDataset_iam(args.train_list, root_dir=args.img_dir, att_dir=args.att_dir, num_classes=args.num_classes, \
143 |                     transform=tsfm_train, test=False)
144 |     train_loader = DataLoader(img_train, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)
145 | 
146 |     return train_loader
147 | 
148 | class VOCDataset_iam(Dataset):
149 |     def __init__(self, datalist_file, root_dir, att_dir, num_classes=20, transform=None, test=False):
150 |         self.root_dir = root_dir
151 |         self.att_dir = att_dir
152 |         self.testing = test
153 |         self.datalist_file =  datalist_file
154 |         self.transform = transform
155 |         self.num_classes = num_classes
156 |         self.image_list, self.label_list, self.label_name_list = \
157 |                 self.read_labeled_image_list(self.root_dir, self.att_dir, self.datalist_file)
158 | 
159 |     def __len__(self):
160 |         return len(self.image_list)
161 | 
162 |     def __getitem__(self, idx):
163 |         img_name =  self.image_list[idx]
164 |         image = Image.open(img_name).convert('RGB')
165 | 
166 |         im_labels = self.label_list[idx]
167 |         im_label_names = self.label_name_list[idx]
168 |         tmp = Image.open(im_label_names[0])
169 |         h, w = tmp.size
170 |         labels = np.zeros((self.num_classes, w, h), dtype=np.float32)
171 | 
172 |         for j in range(len(im_label_names)):
173 |             label = im_labels[j]
174 |             label_name = im_label_names[j]
175 |             labels[label] = np.asarray(Image.open(label_name))
176 |         labels /= 255.0
177 | 
178 |         if self.transform is not None:
179 |             image = self.transform(image)
180 |         
181 |         return image, labels
182 | 
183 |     def read_labeled_image_list(self, data_dir, att_dir, data_list):
184 |         with open(data_list, 'r') as f:
185 |             lines = f.readlines()
186 | 
187 |         img_name_list = []
188 |         label_list = []
189 |         label_name_list = []
190 | 
191 |         for i, line in enumerate(lines):
192 |             fields = line.strip().split()
193 |             image = fields[0] + '.jpg'
194 |             img_name_list.append(os.path.join(data_dir, image))
195 |             
196 |             im_labels = []
197 |             im_label_names = []
198 | 
199 |             for j in range(len(fields)-1):
200 |                 im_labels.append(int(fields[j+1]))    
201 |                 index = '{}_{}.png'.format(i, fields[j+1])
202 |                 im_label_names.append(os.path.join(att_dir, index))
203 | 
204 |             label_list.append(im_labels)
205 |             label_name_list.append(im_label_names)
206 | 
207 |         return img_name_list, label_list, label_name_list
208 | 
209 | class VOCDatasetMSF(Dataset):
210 |     def __init__(self, datalist_file, root_dir, num_classes=20, scales=[0.5, 1, 1.5, 2], transform=None, test=False):
211 |         self.root_dir = root_dir
212 |         self.testing = test
213 |         self.datalist_file =  datalist_file
214 |         self.scales = scales
215 |         self.transform = transform
216 |         self.num_classes = num_classes
217 |         self.image_list, self.label_list = self.read_labeled_image_list(self.root_dir, self.datalist_file)
218 | 
219 |     def __len__(self):
220 |         return len(self.image_list)
221 | 
222 |     def __getitem__(self, idx):
223 |         img_name =  self.image_list[idx]
224 |         image = Image.open(img_name).convert('RGB')
225 |         
226 |         ms_img_list = []
227 |         for s in self.scales:
228 |             target_size = (int(round(image.size[0]*s)),   
229 |                            int(round(image.size[1]*s)))
230 |             s_img = image.resize(target_size, resample=Image.CUBIC) 
231 |             ms_img_list.append(s_img)
232 | 
233 |         if self.transform is not None:
234 |             for i in range(len(ms_img_list)):
235 |                 ms_img_list[i] = self.transform(ms_img_list[i])
236 |         
237 |         msf_img_list = []
238 |         for i in range(len(ms_img_list)):
239 |             msf_img_list.append(ms_img_list[i])
240 |             msf_img_list.append(np.flip(ms_img_list[i], -1).copy())
241 |         
242 |         if self.testing:
243 |             return img_name, msf_img_list, self.label_list[idx]
244 |         
245 |         return msf_img_list, self.label_list[idx]
246 | 
247 |     def read_labeled_image_list(self, data_dir, data_list):
248 |         with open(data_list, 'r') as f:
249 |             lines = f.readlines()
250 |         img_name_list = []
251 |         img_labels = []
252 |         for line in lines:
253 |             fields = line.strip().split()
254 |             image = fields[0] + '.jpg'
255 |             labels = np.zeros((self.num_classes,), dtype=np.float32)
256 |             for i in range(len(fields)-1):
257 |                 index = int(fields[i+1])
258 |                 labels[index] = 1.
259 |             img_name_list.append(os.path.join(data_dir, image))
260 |             img_labels.append(labels)
261 |         return img_name_list, img_labels #np.array(img_labels, dtype=np.float32)
262 | 


--------------------------------------------------------------------------------
/deeplab-pytorch/README.md:
--------------------------------------------------------------------------------
  1 | # DeepLab with PyTorch
  2 | 
  3 | This is an unofficial **PyTorch** implementation of **DeepLab v2** [[1](##references)] with a **ResNet-101** backbone. 
  4 | * **COCO-Stuff** dataset [[2](##references)] and **PASCAL VOC** dataset [[3]()] are supported.
  5 | * The official Caffe weights provided by the authors can be used without building the Caffe APIs.
  6 | * DeepLab v3/v3+ models with the identical backbone are also included (not tested).
  7 | * [```torch.hub``` is supported](#torchhub).
  8 | 
  9 | ## Performance
 10 | 
 11 | ### COCO-Stuff
 12 | 
 13 | <table>
 14 |     <tr>
 15 |         <th>Train set</th>
 16 |         <th>Eval set</th>
 17 |         <th>Code</th>
 18 |         <th>Weight</th>
 19 |         <th>CRF?</th>
 20 |         <th>Pixel<br>Accuracy</th>
 21 |         <th>Mean<br>Accuracy</th>
 22 |         <th>Mean IoU</th>
 23 |         <th>FreqW IoU</th>
 24 |     </tr>
 25 |     <tr>
 26 |         <td rowspan="3">
 27 |             10k <i>train</i> &dagger;
 28 |         </td>
 29 |         <td rowspan="3">10k <i>val</i> &dagger;</td>
 30 |         <td>Official [<a href="#references">2</a>]</td>
 31 |         <td></td>
 32 |         <td></td>
 33 |         <td><strong>65.1</strong></td>
 34 |         <td><strong>45.5</strong></td>
 35 |         <td><strong>34.4</strong></td>
 36 |         <td><strong>50.4</strong></td>
 37 |     </tr>
 38 |     <tr>
 39 |         <td rowspan="2"><strong>This repo</strong></td>
 40 |         <td rowspan="2"><a href="https://github.com/kazuto1011/deeplab-pytorch/releases/download/v1.0/deeplabv2_resnet101_msc-cocostuff10k-20000.pth">Download</a></td>
 41 |         <td></td>
 42 |         <td><strong>65.8</td>
 43 |         <td><strong>45.7</strong></td>
 44 |         <td><strong>34.8</strong></td>
 45 |         <td><strong>51.2</strong></td>
 46 |     </tr>
 47 |     <tr>
 48 |         <td>&#10003;</td>
 49 |         <td>67.1</td>
 50 |         <td>46.4</td>
 51 |         <td>35.6</td>
 52 |         <td>52.5</td>
 53 |     </tr>
 54 |     <tr>
 55 |         <td rowspan="2">
 56 |             164k <i>train</i>
 57 |         </td>
 58 |         <td rowspan="2">164k <i>val</i></td>
 59 |         <td rowspan="2"><strong>This repo</strong></td>
 60 |         <td rowspan="2"><a href="https://github.com/kazuto1011/deeplab-pytorch/releases/download/v1.0/deeplabv2_resnet101_msc-cocostuff164k-100000.pth">Download</a> &Dagger;</td>
 61 |         <td></td>
 62 |         <td>66.8</td>
 63 |         <td>51.2</td>
 64 |         <td>39.1</td>
 65 |         <td>51.5</td>
 66 |     </tr>
 67 |     <tr>
 68 |         <td>&#10003;</td>
 69 |         <td>67.6</td>
 70 |         <td>51.5</td>
 71 |         <td>39.7</td>
 72 |         <td>52.3</td>
 73 |     </tr>
 74 | </table>
 75 | 
 76 | &dagger; Images and labels are pre-warped to square-shape 513x513<br>
 77 | &Dagger; Note for [SPADE](https://nvlabs.github.io/SPADE/) followers: The provided COCO-Stuff 164k weight has been kept intact since 2019/02/23.
 78 | 
 79 | ### PASCAL VOC 2012
 80 | 
 81 | <table>
 82 |     <tr>
 83 |         <th>Train set</th>
 84 |         <th>Eval set</th>
 85 |         <th>Code</th>
 86 |         <th>Weight</th>
 87 |         <th>CRF?</th>
 88 |         <th>Pixel<br>Accuracy</th>
 89 |         <th>Mean<br>Accuracy</th>
 90 |         <th>Mean IoU</th>
 91 |         <th>FreqW IoU</th>
 92 |     </tr>
 93 |     <tr>
 94 |         <td rowspan="4">
 95 |             <i>trainaug</i>
 96 |         </td>
 97 |         <td rowspan="4"><i>val</i></td>
 98 |         <td rowspan="2">Official [<a href="#references">3</a>]</td>
 99 |         <td rowspan="2"></td>
100 |         <td></td>
101 |         <td>-</td>
102 |         <td>-</td>
103 |         <td><strong>76.35</strong></td>
104 |         <td>-</td>
105 |     </tr>
106 |     <tr>
107 |         <td>&#10003;</td>
108 |         <td>-</td>
109 |         <td>-</td>
110 |         <td><strong>77.69</strong></td>
111 |         <td>-</td>
112 |     </tr>
113 |     <tr>
114 |         <td rowspan="2"><strong>This repo</strong></td>
115 |         <td rowspan="2"><a href="https://github.com/kazuto1011/deeplab-pytorch/releases/download/v1.0/deeplabv2_resnet101_msc-vocaug-20000.pth">Download</a></td>
116 |         <td></td>
117 |         <td>94.64</td>
118 |         <td>86.50</td>
119 |         <td><strong>76.65</td>
120 |         <td>90.41</td>
121 |     </tr>
122 |     <tr>
123 |         <td>&#10003;</td>
124 |         <td>95.04</td>
125 |         <td>86.64</td>
126 |         <td><strong>77.93</strong></td>
127 |         <td>91.06</td>
128 |     </tr>
129 | </table>
130 | 
131 | ## Setup
132 | 
133 | ### Requirements
134 | 
135 | Required Python packages are listed in the Anaconda configuration file `configs/conda_env.yaml`.
136 | Please modify the listed `cudatoolkit=10.2` and `python=3.6` as needed and run the following commands.
137 | 
138 | ```sh
139 | # Set up with Anaconda
140 | conda env create -f configs/conda_env.yaml
141 | conda activate deeplab-pytorch
142 | ```
143 | 
144 | ### Download datasets
145 | 
146 | * [COCO-Stuff 10k/164k](data/datasets/cocostuff/README.md)
147 | * [PASCAL VOC 2012](data/datasets/voc12/README.md)
148 | 
149 | ### Download pre-trained caffemodels
150 | 
151 | Caffemodels pre-trained on COCO and PASCAL VOC datasets are released by the DeepLab authors.
152 | In accordance with the papers [[1](##references),[2](##references)], this repository uses the COCO-trained parameters as initial weights.
153 | 
154 | 1. Run the follwing script to download the pre-trained caffemodels (1GB+).
155 | 
156 | ```sh
157 | $ bash scripts/setup_caffemodels.sh
158 | ```
159 | 
160 | 2. Convert the caffemodels to pytorch compatibles. No need to build the Caffe API!
161 | 
162 | ```sh
163 | # Generate "deeplabv1_resnet101-coco.pth" from "init.caffemodel"
164 | $ python convert.py --dataset coco
165 | # Generate "deeplabv2_resnet101_msc-vocaug.pth" from "train2_iter_20000.caffemodel"
166 | $ python convert.py --dataset voc12
167 | ```
168 | 
169 | ## Training & Evaluation
170 | 
171 | To train DeepLab v2 on PASCAL VOC 2012:
172 | 
173 | ```sh
174 | python main.py train \
175 |     --config-path configs/voc12.yaml
176 | ```
177 | 
178 | To evaluate the performance on a validation set:
179 | 
180 | ```sh
181 | python main.py test \
182 |     --config-path configs/voc12.yaml \
183 |     --model-path data/models/voc12/deeplabv2_resnet101_msc/train_aug/checkpoint_final.pth
184 | ```
185 | 
186 | Note: This command saves the predicted logit maps (`.npy`) and the scores (`.json`).
187 | 
188 | To re-evaluate with a CRF post-processing:<br>
189 | 
190 | ```sh
191 | python main.py crf \
192 |     --config-path configs/voc12.yaml
193 | ```
194 | 
195 | Execution of a series of the above scripts is equivalent to `bash scripts/train_eval.sh`.
196 | 
197 | To monitor a loss, run the following command in a separate terminal.
198 | 
199 | ```sh
200 | tensorboard --logdir data/logs
201 | ```
202 | 
203 | Please specify the appropriate configuration files for the other datasets.
204 | 
205 | | Dataset         | Config file                  | #Iterations | Classes                      |
206 | | :-------------- | :--------------------------- | :---------- | :--------------------------- |
207 | | PASCAL VOC 2012 | `configs/voc12.yaml`         | 20,000      | 20 foreground + 1 background |
208 | | COCO-Stuff 10k  | `configs/cocostuff10k.yaml`  | 20,000      | 182 thing/stuff              |
209 | | COCO-Stuff 164k | `configs/cocostuff164k.yaml` | 100,000     | 182 thing/stuff              |
210 | 
211 | Note: Although the label indices range from 0 to 181 in COCO-Stuff 10k/164k, only [171 classes](https://github.com/nightrome/cocostuff/blob/master/labels.md) are supervised.
212 | 
213 | Common settings:
214 | 
215 | - **Model**: DeepLab v2 with ResNet-101 backbone. Dilated rates of ASPP are (6, 12, 18, 24). Output stride is 8.
216 | - **GPU**: All the GPUs visible to the process are used. Please specify the scope with
217 | ```CUDA_VISIBLE_DEVICES=```.
218 | - **Multi-scale loss**: Loss is defined as a sum of responses from multi-scale inputs (1x, 0.75x, 0.5x) and element-wise max across the scales. The *unlabeled* class is ignored in the loss computation.
219 | - **Gradient accumulation**: The mini-batch of 10 samples is not processed at once due to the high occupancy of GPU memories. Instead, gradients of small batches of 5 samples are accumulated for 2 iterations, and weight updating is performed at the end (```batch_size * iter_size = 10```). GPU memory usage is approx. 11.2 GB with the default setting (tested on the single Titan X). You can reduce it with a small ```batch_size```.
220 | - **Learning rate**: Stochastic gradient descent (SGD) is used with momentum of 0.9 and initial learning rate of 2.5e-4. Polynomial learning rate decay is employed; the learning rate is multiplied by ```(1-iter/iter_max)**power``` at every 10 iterations.
221 | - **Monitoring**: Moving average loss (```average_loss``` in Caffe) can be monitored in TensorBoard.
222 | - **Preprocessing**: Input images are randomly re-scaled by factors ranging from 0.5 to 1.5, padded if needed, and randomly cropped to 321x321.
223 | 
224 | Processed images and labels in COCO-Stuff 164k:
225 | 
226 | ![Data](docs/datasets/cocostuff.png)
227 | 
228 | ## Inference Demo
229 | 
230 | You can use [the pre-trained models](#performance), [the converted models](#download-pre-trained-caffemodels), or your models.
231 | 
232 | To process a single image:
233 | 
234 | ```bash
235 | python demo.py single \
236 |     --config-path configs/voc12.yaml \
237 |     --model-path deeplabv2_resnet101_msc-vocaug-20000.pth \
238 |     --image-path image.jpg
239 | ```
240 | 
241 | To run on a webcam:
242 | 
243 | ```bash
244 | python demo.py live \
245 |     --config-path configs/voc12.yaml \
246 |     --model-path deeplabv2_resnet101_msc-vocaug-20000.pth
247 | ```
248 | 
249 | To run a CRF post-processing, add `--crf`. To run on a CPU, add `--cpu`.
250 | 
251 | ## Misc
252 | 
253 | ### torch.hub
254 | 
255 | Model setup with two lines
256 | 
257 | ```python
258 | import torch.hub
259 | model = torch.hub.load("kazuto1011/deeplab-pytorch", "deeplabv2_resnet101", pretrained='cocostuff164k', n_classes=182)
260 | ```
261 | 
262 | ### Difference with Caffe version
263 | 
264 | * While the official code employs 1/16 bilinear interpolation (```Interp``` layer) for downsampling a label for only 0.5x input, this codebase does for both 0.5x and 0.75x inputs with nearest interpolation (```PIL.Image.resize```, [related issue](https://github.com/kazuto1011/deeplab-pytorch/issues/51)).
265 | * Bilinear interpolation on images and logits is performed with the ```align_corners=False```.
266 | 
267 | ### Training batch normalization
268 | 
269 | 
270 | This codebase only supports DeepLab v2 training which freezes batch normalization layers, although
271 | v3/v3+ protocols require training them. If training their parameters on multiple GPUs as well in your projects, please
272 | install [the extra library](https://hangzhang.org/PyTorch-Encoding/) below.
273 | 
274 | ```bash
275 | pip install torch-encoding
276 | ```
277 | 
278 | Batch normalization layers in a model are automatically switched in ```libs/models/resnet.py```.
279 | 
280 | ```python
281 | try:
282 |     from encoding.nn import SyncBatchNorm
283 |     _BATCH_NORM = SyncBatchNorm
284 | except:
285 |     _BATCH_NORM = nn.BatchNorm2d
286 | ```
287 | 
288 | ## References
289 | 
290 | 1. L.-C. Chen, G. Papandreou, I. Kokkinos, K. Murphy, A. L. Yuille. DeepLab: Semantic Image
291 | Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. *IEEE TPAMI*,
292 | 2018.<br>
293 | [Project](http://liangchiehchen.com/projects/DeepLab.html) /
294 | [Code](https://bitbucket.org/aquariusjay/deeplab-public-ver2) / [arXiv
295 | paper](https://arxiv.org/abs/1606.00915)
296 | 
297 | 2. H. Caesar, J. Uijlings, V. Ferrari. COCO-Stuff: Thing and Stuff Classes in Context. In *CVPR*, 2018.<br>
298 | [Project](https://github.com/nightrome/cocostuff) / [arXiv paper](https://arxiv.org/abs/1612.03716)
299 | 
300 | 1. M. Everingham, L. Van Gool, C. K. I. Williams, J. Winn, A. Zisserman. The PASCAL Visual Object
301 | Classes (VOC) Challenge. *IJCV*, 2010.<br>
302 | [Project](http://host.robots.ox.ac.uk/pascal/VOC) /
303 | [Paper](http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham10.pdf)
304 | 


--------------------------------------------------------------------------------
/utils/datasets.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | import numpy as np
  4 | import random
  5 | import collections
  6 | import torch
  7 | import torchvision
  8 | import cv2
  9 | from torch.utils import data
 10 | 
 11 | 
 12 | class VOCDataSet(data.Dataset):
 13 |     def __init__(self, root, list_path, max_iters=None, crop_size=(321, 321), mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255):
 14 |         self.root = root
 15 |         self.list_path = list_path
 16 |         self.crop_h, self.crop_w = crop_size
 17 |         self.scale = scale
 18 |         self.ignore_label = ignore_label
 19 |         self.mean = mean
 20 |         self.is_mirror = mirror
 21 |         # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
 22 |         self.img_ids = [i_id.strip() for i_id in open(list_path)]
 23 |         #if not max_iters==None:
 24 |         #    self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids)))
 25 |         self.files = []
 26 |         # for split in ["train", "trainval", "val"]:
 27 |         for name in self.img_ids:
 28 |             #img_file = osp.join(self.root, "JPEGImages/%s.jpg" % name)
 29 |             #label_file = osp.join(self.root, "SegmentationClassAug/%s.png" % name)
 30 |             #img_file = osp.join(self.root, "trainval_image/%s.jpg" % name)
 31 |             #label_file = osp.join(self.root, "trainval_gt/%s.png" % name)
 32 |             self.files.append({
 33 |                 "img": img_file,
 34 |                 "label": label_file,
 35 |                 "name": name
 36 |             })
 37 | 
 38 |     def __len__(self):
 39 |         return len(self.files)
 40 | 
 41 |     def generate_scale_label(self, image, label):
 42 |         f_scale = 0.5 + random.randint(0, 11) / 10.0
 43 |         image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR)
 44 |         label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST)
 45 |         return image, label
 46 | 
 47 |     def __getitem__(self, index):
 48 |         datafiles = self.files[index]
 49 |         image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
 50 |         label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
 51 |         size = image.shape
 52 |         name = datafiles["name"]
 53 |         if self.scale:
 54 |             image, label = self.generate_scale_label(image, label)
 55 |         image = np.asarray(image, np.float32)
 56 |         image -= self.mean
 57 |         img_h, img_w = label.shape
 58 |         pad_h = max(self.crop_h - img_h, 0)
 59 |         pad_w = max(self.crop_w - img_w, 0)
 60 |         if pad_h > 0 or pad_w > 0:
 61 |             img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, 
 62 |                 pad_w, cv2.BORDER_CONSTANT, 
 63 |                 value=(0.0, 0.0, 0.0))
 64 |             label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, 
 65 |                 pad_w, cv2.BORDER_CONSTANT,
 66 |                 value=(self.ignore_label,))
 67 |         else:
 68 |             img_pad, label_pad = image, label
 69 | 
 70 |         img_h, img_w = label_pad.shape
 71 |         h_off = random.randint(0, img_h - self.crop_h)
 72 |         w_off = random.randint(0, img_w - self.crop_w)
 73 |         # roi = cv2.Rect(w_off, h_off, self.crop_w, self.crop_h);
 74 |         image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
 75 |         label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
 76 |         #image = image[:, :, ::-1]  # change to BGR
 77 |         image = image.transpose((2, 0, 1))
 78 |         if self.is_mirror:
 79 |             flip = np.random.choice(2) * 2 - 1
 80 |             image = image[:, :, ::flip]
 81 |             label = label[:, ::flip]
 82 | 
 83 |         return image.copy(), label.copy(), np.array(size), name
 84 | 
 85 | 
 86 | class VOCDataTestSet(data.Dataset):
 87 |     def __init__(self, root, list_path, crop_size=(505, 505), mean=(128, 128, 128)):
 88 |         self.root = root
 89 |         self.list_path = list_path
 90 |         self.crop_h, self.crop_w = crop_size
 91 |         self.mean = mean
 92 |         # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
 93 |         self.img_ids = [i_id.strip() for i_id in open(list_path)]
 94 |         self.files = [] 
 95 |         # for split in ["train", "trainval", "val"]:
 96 |         for name in self.img_ids:
 97 |             img_file = osp.join(self.root, "JPEGImages/%s.jpg" % name)
 98 |             label_file = osp.join(self.root, "SegmentationClassAug/%s.png" % name)
 99 |             self.files.append({
100 |                 "img": img_file,
101 |                 "label": label_file
102 |             })
103 | 
104 |     def __len__(self):
105 |         return len(self.files)
106 | 
107 |     def __getitem__(self, index):
108 |         datafiles = self.files[index]
109 |         image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
110 |         label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
111 |         name = osp.splitext(osp.basename(datafiles["img"]))[0]
112 |         image = np.asarray(image, np.float32)
113 |     
114 |         img_h, img_w, _ = image.shape
115 |         #max_size = max(img_h, img_w)
116 |         #ratio = float(self.crop_h) / float(max_size)
117 |         #new_h = int(ratio * img_h)
118 |         #new_w = int(ratio * img_w)
119 |         #image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
120 |         size = image.shape
121 |         
122 | 
123 |         image -= self.mean
124 |         pad_h = max(self.crop_h - img_h, 0)
125 |         pad_w = max(self.crop_w - img_w, 0)
126 |         if pad_h > 0 or pad_w > 0:
127 |             image = cv2.copyMakeBorder(image, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT, value=(0.0, 0.0, 0.0))
128 |         image = image.transpose((2, 0, 1))
129 |         return image, label, np.array(size), name
130 | 
131 | class CSDataSet(data.Dataset):
132 |     def __init__(self, root, list_path, max_iters=None, crop_size=(321, 321), mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255):
133 |         self.root = root
134 |         self.list_path = list_path
135 |         self.crop_h, self.crop_w = crop_size
136 |         self.scale = scale
137 |         self.ignore_label = ignore_label
138 |         self.mean = mean
139 |         self.is_mirror = mirror
140 |         # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
141 |         self.img_ids = [i_id.strip().split() for i_id in open(list_path)]
142 |         if not max_iters==None:
143 |                 self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids)))
144 |         self.files = []
145 |         # for split in ["train", "trainval", "val"]:
146 |         for item in self.img_ids:
147 |             image_path, label_path = item
148 |             name = osp.splitext(osp.basename(label_path))[0]
149 |             img_file = osp.join(self.root, image_path)
150 |             label_file = osp.join(self.root, label_path)
151 |             self.files.append({
152 |                 "img": img_file,
153 |                 "label": label_file,
154 |                 "name": name
155 |             })
156 |         self.id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label,
157 |                               3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label,
158 |                               7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4,
159 |                               14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5,
160 |                               18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14,
161 |                               28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18}
162 |         print('{} images are loaded!'.format(len(self.img_ids)))
163 | 
164 |     def __len__(self):
165 |         return len(self.files)
166 | 
167 |     def generate_scale_label(self, image, label):
168 |         f_scale = 0.7 + random.randint(0, 14) / 10.0
169 |         image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR)
170 |         label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST)
171 |         return image, label
172 | 
173 |     def id2trainId(self, label, reverse=False):
174 |         label_copy = label.copy()
175 |         if reverse:
176 |             for v, k in self.id_to_trainid.items():
177 |                 label_copy[label == k] = v
178 |         else:
179 |             for k, v in self.id_to_trainid.items():
180 |                 label_copy[label == k] = v
181 |         return label_copy
182 | 
183 |     def __getitem__(self, index):
184 |         datafiles = self.files[index]
185 |         image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
186 |         label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
187 |         label = self.id2trainId(label)
188 |         size = image.shape
189 |         name = datafiles["name"]
190 |         if self.scale:
191 |             image, label = self.generate_scale_label(image, label)
192 |         image = np.asarray(image, np.float32)
193 |         image -= self.mean
194 |         img_h, img_w = label.shape
195 |         pad_h = max(self.crop_h - img_h, 0)
196 |         pad_w = max(self.crop_w - img_w, 0)
197 |         if pad_h > 0 or pad_w > 0:
198 |             img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, 
199 |                 pad_w, cv2.BORDER_CONSTANT, 
200 |                 value=(0.0, 0.0, 0.0))
201 |             label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, 
202 |                 pad_w, cv2.BORDER_CONSTANT,
203 |                 value=(self.ignore_label,))
204 |         else:
205 |             img_pad, label_pad = image, label
206 | 
207 |         img_h, img_w = label_pad.shape
208 |         h_off = random.randint(0, img_h - self.crop_h)
209 |         w_off = random.randint(0, img_w - self.crop_w)
210 |         # roi = cv2.Rect(w_off, h_off, self.crop_w, self.crop_h);
211 |         image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
212 |         label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
213 |         #image = image[:, :, ::-1]  # change to BGR
214 |         image = image.transpose((2, 0, 1))
215 |         if self.is_mirror:
216 |             flip = np.random.choice(2) * 2 - 1
217 |             image = image[:, :, ::flip]
218 |             label = label[:, ::flip]
219 | 
220 |         return image.copy(), label.copy(), np.array(size), name
221 | 
222 | 
223 | class CSDataTestSet(data.Dataset):
224 |     def __init__(self, root, list_path, crop_size=(505, 505), mean=(128, 128, 128)):
225 |         self.root = root
226 |         self.list_path = list_path
227 |         self.crop_h, self.crop_w = crop_size
228 |         self.mean = mean
229 |         # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
230 |         self.img_ids = [i_id.strip().split() for i_id in open(list_path)]
231 |         self.files = [] 
232 |         # for split in ["train", "trainval", "val"]:
233 |         for item in self.img_ids:
234 |             image_path, label_path = item
235 |             name = osp.splitext(osp.basename(label_path))[0]
236 |             img_file = osp.join(self.root, image_path)
237 |             self.files.append({
238 |                 "img": img_file
239 |             })
240 | 
241 |     def __len__(self):
242 |         return len(self.files)
243 | 
244 |     def __getitem__(self, index):
245 |         datafiles = self.files[index]
246 |         image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
247 |         size = image.shape
248 |         name = osp.splitext(osp.basename(datafiles["img"]))[0]
249 |         image = np.asarray(image, np.float32)
250 |         image -= self.mean
251 |         
252 |         img_h, img_w, _ = image.shape
253 |         pad_h = max(self.crop_h - img_h, 0)
254 |         pad_w = max(self.crop_w - img_w, 0)
255 |         if pad_h > 0 or pad_w > 0:
256 |             image = cv2.copyMakeBorder(image, 0, pad_h, 0, 
257 |                 pad_w, cv2.BORDER_CONSTANT, 
258 |                 value=(0.0, 0.0, 0.0))
259 |         image = image.transpose((2, 0, 1))
260 |         return image, name, size
261 | 
262 | class CSDataTestSet(data.Dataset):
263 |     def __init__(self, root, list_path, crop_size=(505, 505)):
264 |         self.root = root
265 |         self.list_path = list_path
266 |         self.crop_h, self.crop_w = crop_size
267 |         # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
268 |         self.img_ids = [i_id.strip().split()[0] for i_id in open(list_path)]
269 |         self.files = [] 
270 |         # for split in ["train", "trainval", "val"]:
271 |         for image_path in self.img_ids:
272 |             name = osp.splitext(osp.basename(image_path))[0]
273 |             img_file = osp.join(self.root, image_path)
274 |             self.files.append({
275 |                 "img": img_file
276 |             })
277 | 
278 |     def __len__(self):
279 |         return len(self.files)
280 | 
281 |     def __getitem__(self, index):
282 |         datafiles = self.files[index]
283 |         image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
284 |         image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR)
285 |         size = image.shape
286 |         name = osp.splitext(osp.basename(datafiles["img"]))[0]
287 |         image = np.asarray(image, np.float32)
288 |         image = (image - image.min()) / (image.max() - image.min())
289 |         
290 |         img_h, img_w, _ = image.shape
291 |         pad_h = max(self.crop_h - img_h, 0)
292 |         pad_w = max(self.crop_w - img_w, 0)
293 |         if pad_h > 0 or pad_w > 0:
294 |             image = cv2.copyMakeBorder(image, 0, pad_h, 0, 
295 |                 pad_w, cv2.BORDER_CONSTANT, 
296 |                 value=(0.0, 0.0, 0.0))
297 |         image = image.transpose((2, 0, 1))
298 |         return image, np.array(size), name
299 | 
300 | if __name__ == '__main__':
301 |     dst = VOCDataSet("./data", is_transform=True)
302 |     trainloader = data.DataLoader(dst, batch_size=4)
303 |     for i, data in enumerate(trainloader):
304 |         imgs, labels = data
305 |         if i == 0:
306 |             img = torchvision.utils.make_grid(imgs).numpy()
307 |             img = np.transpose(img, (1, 2, 0))
308 |             img = img[:, :, ::-1]
309 |             plt.imshow(img)
310 |             plt.show()
311 | 


--------------------------------------------------------------------------------