├── data
├── __init__.py
├── __init__.pyc
├── yh_dataset.pyc
├── base_dataset.pyc
├── data_loader.pyc
├── image_folder.pyc
├── random_crop_yh.pyc
├── yh_seg_dataset.pyc
├── base_data_loader.pyc
├── unaligned_dataset.pyc
├── yh_test_seg_dataset.pyc
├── custom_dataset_data_loader.pyc
├── data_loader.py
├── base_data_loader.py
├── single_dataset.py
├── unaligned_dataset.py
├── yh_dataset.py
├── random_crop_yh.py
├── custom_dataset_data_loader.py
├── aligned_dataset.py
├── image_folder.py
├── base_dataset.py
├── yh_seg_spleenonly_dataset.py
├── yh_test_seg_dataset.py
└── yh_seg_dataset.py
├── models
├── __init__.py
├── FCNGCN.pyc
├── models.pyc
├── __init__.pyc
├── networks.pyc
├── base_model.pyc
├── test_model.pyc
├── cycle_gan_model.pyc
├── cycle_seg_model.pyc
├── test_seg_model.pyc
├── models.py
├── test_model.py
├── base_model.py
├── test_seg_model.py
└── FCNGCN.py
├── util
├── __init__.py
├── html.pyc
├── util.pyc
├── __init__.pyc
├── image_pool.pyc
├── visualizer.pyc
├── png.py
├── image_pool.py
├── html.py
├── util.py
└── get_data.py
├── options
├── __init__.py
├── __init__.pyc
├── base_options.pyc
├── train_options.pyc
├── .idea
│ ├── misc.xml
│ ├── modules.xml
│ └── options.iml
├── test_options.py
└── train_options.py
├── torchsrc
├── models
│ ├── __init__.py
│ └── __init__.pyc
├── utils
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── image_pool.pyc
│ ├── image_pool.py
│ └── util.py
├── datasets
│ ├── apc
│ │ ├── data
│ │ │ └── mit_training_blacklist.yaml
│ │ ├── jsk.pyc
│ │ ├── rbo.pyc
│ │ ├── v1.pyc
│ │ ├── v2.pyc
│ │ ├── v3.pyc
│ │ ├── base.pyc
│ │ ├── __init__.pyc
│ │ ├── mit_benchmark.pyc
│ │ ├── mit_training.pyc
│ │ ├── __init__.py
│ │ ├── v1.py
│ │ ├── v2.py
│ │ ├── v3.py
│ │ ├── jsk.py
│ │ ├── base.py
│ │ ├── rbo.py
│ │ ├── mit_training.py
│ │ └── mit_benchmark.py
│ ├── voc.pyc
│ ├── __init__.pyc
│ └── __init__.py
├── utils.pyc
├── __init__.pyc
├── trainer.pyc
├── __init__.py
├── ext
│ └── fcn.berkeleyvision.org
│ │ ├── voc-fcn16s
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── voc-fcn32s
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── voc-fcn8s
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── nyud-fcn32s-hha
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── siftflow-fcn16s
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── siftflow-fcn32s
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── siftflow-fcn8s
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ └── solve.py
│ │ ├── voc-fcn-alexnet
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── voc-fcn8s-atonce
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── nyud-fcn32s-color
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── pascalcontext-fcn16s
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── pascalcontext-fcn32s
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── pascalcontext-fcn8s
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── nyud-fcn32s-color-hha
│ │ ├── caffemodel-url
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── data
│ │ ├── pascal
│ │ │ ├── classes.txt
│ │ │ └── README.md
│ │ ├── nyud
│ │ │ ├── classes.txt
│ │ │ ├── README.md
│ │ │ ├── train.txt
│ │ │ └── val.txt
│ │ ├── sift-flow
│ │ │ ├── README.md
│ │ │ ├── classes.txt
│ │ │ └── test.txt
│ │ └── pascal-context
│ │ │ ├── classes-59.txt
│ │ │ └── README.md
│ │ ├── nyud-fcn32s-color-d
│ │ ├── solver.prototxt
│ │ ├── solve.py
│ │ └── net.py
│ │ ├── ilsvrc-nets
│ │ └── README.md
│ │ ├── infer.py
│ │ ├── voc_helper.py
│ │ ├── score.py
│ │ ├── surgery.py
│ │ ├── siftflow_layers.py
│ │ └── pascalcontext_layers.py
└── utils.py
├── imgs
├── Figure1.jpg
├── Figure2.jpg
└── Figure3.jpg
├── sublist.py
├── LICENSE1
├── LICENSE2
└── README.md
/data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/options/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/torchsrc/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/torchsrc/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/data/mit_training_blacklist.yaml:
--------------------------------------------------------------------------------
1 | - 59651
2 | - 87744
3 |
--------------------------------------------------------------------------------
/util/html.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/util/html.pyc
--------------------------------------------------------------------------------
/util/util.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/util/util.pyc
--------------------------------------------------------------------------------
/data/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/__init__.pyc
--------------------------------------------------------------------------------
/imgs/Figure1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/imgs/Figure1.jpg
--------------------------------------------------------------------------------
/imgs/Figure2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/imgs/Figure2.jpg
--------------------------------------------------------------------------------
/imgs/Figure3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/imgs/Figure3.jpg
--------------------------------------------------------------------------------
/models/FCNGCN.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/FCNGCN.pyc
--------------------------------------------------------------------------------
/models/models.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/models.pyc
--------------------------------------------------------------------------------
/util/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/util/__init__.pyc
--------------------------------------------------------------------------------
/data/yh_dataset.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/yh_dataset.pyc
--------------------------------------------------------------------------------
/models/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/__init__.pyc
--------------------------------------------------------------------------------
/models/networks.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/networks.pyc
--------------------------------------------------------------------------------
/torchsrc/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/utils.pyc
--------------------------------------------------------------------------------
/util/image_pool.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/util/image_pool.pyc
--------------------------------------------------------------------------------
/util/visualizer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/util/visualizer.pyc
--------------------------------------------------------------------------------
/data/base_dataset.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/base_dataset.pyc
--------------------------------------------------------------------------------
/data/data_loader.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/data_loader.pyc
--------------------------------------------------------------------------------
/data/image_folder.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/image_folder.pyc
--------------------------------------------------------------------------------
/models/base_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/base_model.pyc
--------------------------------------------------------------------------------
/models/test_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/test_model.pyc
--------------------------------------------------------------------------------
/options/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/options/__init__.pyc
--------------------------------------------------------------------------------
/torchsrc/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/__init__.pyc
--------------------------------------------------------------------------------
/torchsrc/trainer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/trainer.pyc
--------------------------------------------------------------------------------
/data/random_crop_yh.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/random_crop_yh.pyc
--------------------------------------------------------------------------------
/data/yh_seg_dataset.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/yh_seg_dataset.pyc
--------------------------------------------------------------------------------
/options/base_options.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/options/base_options.pyc
--------------------------------------------------------------------------------
/data/base_data_loader.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/base_data_loader.pyc
--------------------------------------------------------------------------------
/data/unaligned_dataset.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/unaligned_dataset.pyc
--------------------------------------------------------------------------------
/models/cycle_gan_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/cycle_gan_model.pyc
--------------------------------------------------------------------------------
/models/cycle_seg_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/cycle_seg_model.pyc
--------------------------------------------------------------------------------
/models/test_seg_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/test_seg_model.pyc
--------------------------------------------------------------------------------
/options/train_options.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/options/train_options.pyc
--------------------------------------------------------------------------------
/torchsrc/datasets/voc.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/voc.pyc
--------------------------------------------------------------------------------
/torchsrc/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/utils/__init__.pyc
--------------------------------------------------------------------------------
/data/yh_test_seg_dataset.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/yh_test_seg_dataset.pyc
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/jsk.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/jsk.pyc
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/rbo.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/rbo.pyc
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/v1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/v1.pyc
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/v2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/v2.pyc
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/v3.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/v3.pyc
--------------------------------------------------------------------------------
/torchsrc/models/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/models/__init__.pyc
--------------------------------------------------------------------------------
/torchsrc/utils/image_pool.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/utils/image_pool.pyc
--------------------------------------------------------------------------------
/torchsrc/datasets/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/__init__.pyc
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/base.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/base.pyc
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/__init__.pyc
--------------------------------------------------------------------------------
/data/custom_dataset_data_loader.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/custom_dataset_data_loader.pyc
--------------------------------------------------------------------------------
/torchsrc/__init__.py:
--------------------------------------------------------------------------------
1 | from . import models # NOQA
2 | from .trainer import Trainer # NOQA
3 | from . import utils # NOQA
4 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/fcn16s-heavy-pascal.caffemodel
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/fcn32s-heavy-pascal.caffemodel
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/mit_benchmark.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/mit_benchmark.pyc
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/mit_training.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/mit_training.pyc
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/nyud-fcn32s-hha-heavy.caffemodel
2 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/siftflow-fcn16s-heavy.caffemodel
2 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/siftflow-fcn32s-heavy.caffemodel
2 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/siftflow-fcn8s-heavy.caffemodel
2 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/fcn-alexnet-pascal.caffemodel
2 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/fcn8s-atonce-pascal.caffemodel
2 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/nyud-fcn32s-color-heavy.caffemodel
2 |
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/__init__.py:
--------------------------------------------------------------------------------
1 | from v1 import APC2016V1 # NOQA
2 | from v2 import APC2016V2 # NOQA
3 | from v3 import APC2016V3 # NOQA
4 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/pascalcontext-fcn16s-heavy.caffemodel
2 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/pascalcontext-fcn32s-heavy.caffemodel
2 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/pascalcontext-fcn8s-heavy.caffemodel
2 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/caffemodel-url:
--------------------------------------------------------------------------------
1 | http://dl.caffe.berkeleyvision.org/nyud-fcn32s-color-hha-heavy.caffemodel
2 |
--------------------------------------------------------------------------------
/data/data_loader.py:
--------------------------------------------------------------------------------
1 |
2 | def CreateDataLoader(opt):
3 | from data.custom_dataset_data_loader import CustomDatasetDataLoader
4 | data_loader = CustomDatasetDataLoader()
5 | print(data_loader.name())
6 | data_loader.initialize(opt)
7 | return data_loader
8 |
--------------------------------------------------------------------------------
/torchsrc/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .apc import APC2016V1 # NOQA
2 | from .apc import APC2016V2 # NOQA
3 | from .apc import APC2016V3 # NOQA
4 | from .voc import SBDClassSeg # NOQA
5 | from .voc import VOC2011ClassSeg # NOQA
6 | from .voc import VOC2012ClassSeg # NOQA
7 |
--------------------------------------------------------------------------------
/data/base_data_loader.py:
--------------------------------------------------------------------------------
1 |
2 | class BaseDataLoader():
3 | def __init__(self):
4 | pass
5 |
6 | def initialize(self, opt):
7 | self.opt = opt
8 | pass
9 |
10 | def load_data():
11 | return None
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/options/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/options/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/data/pascal/classes.txt:
--------------------------------------------------------------------------------
1 | background
2 | aeroplane
3 | bicycle
4 | bird
5 | boat
6 | bottle
7 | bus
8 | car
9 | cat
10 | chair
11 | cow
12 | diningtable
13 | dog
14 | horse
15 | motorbike
16 | person
17 | pottedplant
18 | sheep
19 | sofa
20 | train
21 | tvmonitor
22 |
23 | and 255 is the ignore label that marks pixels excluded from learning and
24 | evaluation by the PASCAL VOC ground truth.
25 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "trainval.prototxt"
2 | test_net: "test.prototxt"
3 | test_iter: 200
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-12
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 300000
16 | weight_decay: 0.0005
17 | test_initialization: false
18 |
--------------------------------------------------------------------------------
/options/.idea/options.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "trainval.prototxt"
2 | test_net: "test.prototxt"
3 | test_iter: 200
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-12
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 300000
16 | weight_decay: 0.0005
17 | test_initialization: false
18 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "trainval.prototxt"
2 | test_net: "test.prototxt"
3 | test_iter: 200
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-10
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 300000
16 | weight_decay: 0.0005
17 | test_initialization: false
18 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "train.prototxt"
2 | test_net: "val.prototxt"
3 | test_iter: 736
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-14
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 100000
16 | weight_decay: 0.0005
17 | snapshot: 4000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "train.prototxt"
2 | test_net: "val.prototxt"
3 | test_iter: 736
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for normalized softmax
10 | base_lr: 1e-4
11 | # standard momentum
12 | momentum: 0.9
13 | # gradient accumulation
14 | iter_size: 20
15 | max_iter: 100000
16 | weight_decay: 0.0005
17 | snapshot: 4000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "train.prototxt"
2 | test_net: "val.prototxt"
3 | test_iter: 736
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-12
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 100000
16 | weight_decay: 0.0005
17 | snapshot: 4000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "train.prototxt"
2 | test_net: "val.prototxt"
3 | test_iter: 736
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-10
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 100000
16 | weight_decay: 0.0005
17 | snapshot: 4000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "trainval.prototxt"
2 | test_net: "test.prototxt"
3 | test_iter: 654
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-10
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 300000
16 | weight_decay: 0.0005
17 | snapshot: 2000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "train.prototxt"
2 | test_net: "val.prototxt"
3 | test_iter: 736
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-10
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 300000
16 | weight_decay: 0.0005
17 | snapshot: 4000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "trainval.prototxt"
2 | test_net: "test.prototxt"
3 | test_iter: 654
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-10
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 300000
16 | weight_decay: 0.0005
17 | snapshot: 2000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "trainval.prototxt"
2 | test_net: "test.prototxt"
3 | test_iter: 654
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-10
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 300000
16 | weight_decay: 0.0005
17 | snapshot: 2000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "train.prototxt"
2 | test_net: "val.prototxt"
3 | test_iter: 5105
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-12
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 300000
16 | weight_decay: 0.0005
17 | snapshot: 4000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "train.prototxt"
2 | test_net: "val.prototxt"
3 | test_iter: 5105
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-10
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 300000
16 | weight_decay: 0.0005
17 | snapshot: 4000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "train.prototxt"
2 | test_net: "val.prototxt"
3 | test_iter: 5105
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-14
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 300000
16 | weight_decay: 0.0005
17 | snapshot: 4000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "trainval.prototxt"
2 | test_net: "test.prototxt"
3 | test_iter: 654
4 | # make test net, but don't invoke it from the solver itself
5 | test_interval: 999999999
6 | display: 20
7 | average_loss: 20
8 | lr_policy: "fixed"
9 | # lr for unnormalized softmax
10 | base_lr: 1e-12
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 300000
16 | weight_decay: 0.0005
17 | snapshot: 2000
18 | snapshot_prefix: "snapshot/train"
19 | test_initialization: false
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/classes.txt:
--------------------------------------------------------------------------------
1 | wall
2 | floor
3 | cabinet
4 | bed
5 | chair
6 | sofa
7 | table
8 | door
9 | window
10 | bookshelf
11 | picture
12 | counter
13 | blinds
14 | desk
15 | shelves
16 | curtain
17 | dresser
18 | pillow
19 | mirror
20 | floor mat
21 | clothes
22 | ceiling
23 | books
24 | refridgerator
25 | television
26 | paper
27 | towel
28 | shower curtain
29 | box
30 | whiteboard
31 | person
32 | night stand
33 | toilet
34 | sink
35 | lamp
36 | bathtub
37 | bag
38 | otherstructure
39 | otherfurniture
40 | otherprop
41 |
42 | and 0 is void (and converted to 255 by the NYUDSegDataLayer)
43 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/ilsvrc-nets/README.md:
--------------------------------------------------------------------------------
1 | # ILSVRC Networks
2 |
3 | These classification networks are trained on ILSVRC for object recognition.
4 | We cast these nets into fully convolutional form to make use of their parameters as pre-training.
5 |
6 | To reproduce our FCNs, or train your own on your own data, you need to first collect the corresponding base network.
7 |
8 | - [VGG16](https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md)
9 | - [CaffeNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_reference_caffenet)
10 | - [BVLC GoogLeNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet)
11 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/infer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image
3 |
4 | import caffe
5 |
6 | # load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe
7 | im = Image.open('pascal/VOC2010/JPEGImages/2007_000129.jpg')
8 | in_ = np.array(im, dtype=np.float32)
9 | in_ = in_[:,:,::-1]
10 | in_ -= np.array((104.00698793,116.66876762,122.67891434))
11 | in_ = in_.transpose((2,0,1))
12 |
13 | # load net
14 | net = caffe.Net('voc-fcn8s/deploy.prototxt', 'voc-fcn8s/fcn8s-heavy-pascal.caffemodel', caffe.TEST)
15 | # shape for input (data blob is N x C x H x W), set data
16 | net.blobs['data'].reshape(1, *in_.shape)
17 | net.blobs['data'].data[...] = in_
18 | # run net and take argmax for prediction
19 | net.forward()
20 | out = net.blobs['score'].data[0].argmax(axis=0)
21 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/README.md:
--------------------------------------------------------------------------------
1 | # SIFT Flow
2 |
3 | SIFT Flow is a semantic segmentation dataset with two labelings:
4 |
5 | - semantic classes, such as "cat" or "dog"
6 | - geometric classes, consisting of "horizontal, vertical, and sky"
7 |
8 | Refer to `classes.txt` for the listing of classes in model output order.
9 | Refer to `../siftflow_layers.py` for the Python data layer for this dataset.
10 |
11 | Note that the dataset has a number of issues, including unannotated images and missing classes from the test set.
12 | The provided splits exclude the unannotated images.
13 | As noted in the paper, care must be taken for proper evalution by excluding the missing classes.
14 |
15 | Download the dataset:
16 | http://www.cs.unc.edu/~jtighe/Papers/ECCV10/siftflow/SiftFlowDataset.zip
17 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../voc-fcn32s/voc-fcn32s.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | val = np.loadtxt('../data/segvalid11.txt', dtype=str)
29 |
30 | for _ in range(25):
31 | solver.step(4000)
32 | score.seg_tests(solver, False, val, layer='score')
33 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | val = np.loadtxt('../data/segvalid11.txt', dtype=str)
29 |
30 | for _ in range(25):
31 | solver.step(4000)
32 | score.seg_tests(solver, False, val, layer='score')
33 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../voc-fcn16s/voc-fcn16s.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | val = np.loadtxt('../data/segvalid11.txt', dtype=str)
29 |
30 | for _ in range(25):
31 | solver.step(4000)
32 | score.seg_tests(solver, False, val, layer='score')
33 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | test = np.loadtxt('../data/nyud/test.txt', dtype=str)
29 |
30 | for _ in range(50):
31 | solver.step(2000)
32 | score.seg_tests(solver, False, val, layer='score')
33 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | test = np.loadtxt('../data/nyud/test.txt', dtype=str)
29 |
30 | for _ in range(50):
31 | solver.step(2000)
32 | score.seg_tests(solver, False, val, layer='score')
33 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../ilsvrc-nets/alexnet-fcn.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | val = np.loadtxt('../data/segvalid11.txt', dtype=str)
29 |
30 | for _ in range(25):
31 | solver.step(4000)
32 | score.seg_tests(solver, False, val, layer='score')
33 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | val = np.loadtxt('../data/segvalid11.txt', dtype=str)
29 |
30 | for _ in range(75):
31 | solver.step(4000)
32 | score.seg_tests(solver, False, val, layer='score')
33 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | val = np.loadtxt('../data/pascal/VOC2010/ImageSets/Main/val.txt', dtype=str)
29 |
30 | for _ in range(50):
31 | solver.step(8000)
32 | score.seg_tests(solver, False, val, layer='score')
33 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../pascalcontext-fcn32s/pascalcontext-fcn32s.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | val = np.loadtxt('../data/pascal/VOC2010/ImageSets/Main/val.txt', dtype=str)
29 |
30 | for _ in range(50):
31 | solver.step(8000)
32 | score.seg_tests(solver, False, val, layer='score')
33 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../pascalcontext-fcn16s/pascalcontext-fcn16s.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | val = np.loadtxt('../data/pascal/VOC2010/ImageSets/Main/val.txt', dtype=str)
29 |
30 | for _ in range(50):
31 | solver.step(8000)
32 | score.seg_tests(solver, False, val, layer='score')
33 |
--------------------------------------------------------------------------------
/options/test_options.py:
--------------------------------------------------------------------------------
1 | from .base_options import BaseOptions
2 |
3 |
4 | class TestOptions(BaseOptions):
5 | def initialize(self):
6 | BaseOptions.initialize(self)
7 | self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.')
8 | self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
9 | self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
10 | self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
11 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
12 | self.parser.add_argument('--how_many', type=int, default=50, help='how many test images to run')
13 | self.isTrain = False
14 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/README.md:
--------------------------------------------------------------------------------
1 | # NYUDv2: NYU Depth Dataset V2
2 |
3 | NYUDv2 has a curated semantic segmentation challenge with RGB-D inputs and full scene labels of objects and surfaces.
4 | While there are many labels, we follow the 40 class task defined by
5 |
6 | > Perceptual Organization and Recognition of Indoor Scenes from RGB-D Images.
7 | Saurabh Gupta, Pablo Arbelaez, and Jitendra Malik.
8 | CVPR 2013
9 |
10 | at http://www.cs.berkeley.edu/~sgupta/pdf/GuptaArbelaezMalikCVPR13.pdf .
11 | To reproduce the results of our paper, you must make use of the data from Gupta et al. at http://people.eecs.berkeley.edu/~sgupta/cvpr13/data.tgz .
12 |
13 | Refer to `classes.txt` for the listing of classes in model output order.
14 | Refer to `../nyud_layers.py` for the Python data layer for this dataset.
15 |
16 | See the dataset site: http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html.
17 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/classes-59.txt:
--------------------------------------------------------------------------------
1 | 0: background
2 | 1: aeroplane
3 | 2: bicycle
4 | 3: bird
5 | 4: boat
6 | 5: bottle
7 | 6: bus
8 | 7: car
9 | 8: cat
10 | 9: chair
11 | 10: cow
12 | 11: diningtable
13 | 12: dog
14 | 13: horse
15 | 14: motorbike
16 | 15: person
17 | 16: pottedplant
18 | 17: sheep
19 | 18: sofa
20 | 19: train
21 | 20: tvmonitor
22 | 21: bag
23 | 22: bed
24 | 23: bench
25 | 24: book
26 | 25: building
27 | 26: cabinet
28 | 27: ceiling
29 | 28: clothes
30 | 29: computer
31 | 30: cup
32 | 31: door
33 | 32: fence
34 | 33: floor
35 | 34: flower
36 | 35: food
37 | 36: grass
38 | 37: ground
39 | 38: keyboard
40 | 39: light
41 | 40: mountain
42 | 41: mouse
43 | 42: curtain
44 | 43: platform
45 | 44: sign
46 | 45: plate
47 | 46: road
48 | 47: rock
49 | 48: shelves
50 | 49: sidewalk
51 | 50: sky
52 | 51: snow
53 | 52: bedcloth
54 | 53: track
55 | 54: tree
56 | 55: truck
57 | 56: wall
58 | 57: water
59 | 58: window
60 | 59: wood
61 |
--------------------------------------------------------------------------------
/data/single_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import torchvision.transforms as transforms
3 | from data.base_dataset import BaseDataset, get_transform
4 | from data.image_folder import make_dataset
5 | from PIL import Image
6 |
7 |
8 | class SingleDataset(BaseDataset):
9 | def initialize(self, opt):
10 | self.opt = opt
11 | self.root = opt.dataroot
12 | self.dir_A = os.path.join(opt.dataroot)
13 |
14 | self.A_paths = make_dataset(self.dir_A)
15 |
16 | self.A_paths = sorted(self.A_paths)
17 |
18 | self.transform = get_transform(opt)
19 |
20 | def __getitem__(self, index):
21 | A_path = self.A_paths[index]
22 |
23 | A_img = Image.open(A_path).convert('RGB')
24 |
25 | A_img = self.transform(A_img)
26 |
27 | return {'A': A_img, 'A_paths': A_path}
28 |
29 | def __len__(self):
30 | return len(self.A_paths)
31 |
32 | def name(self):
33 | return 'SingleImageDataset'
34 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/README.md:
--------------------------------------------------------------------------------
1 | # PASCAL-Context
2 |
3 | PASCAL-Context is a full object and scene labeling of PASCAL VOC 2010.
4 | It includes both object (cat, dog, ...) and surface (sky, grass, ...) classes.
5 |
6 | We follow the 59 class task defined by
7 |
8 | > The Role of Context for Object Detection and Semantic Segmentation in the Wild.
9 | Roozbeh Mottaghi, Xianjie Chen, Xiaobai Liu, Nam-Gyu Cho, Seong-Whan Lee, Sanja Fidler, Raquel Urtasun, and Alan Yuille.
10 | CVPR 2014
11 |
12 | which selects the 59 most common classes for learning and evaluation.
13 |
14 | Refer to `classes-59.txt` for the listing of classes in model output order.
15 | Refer to `../pascalcontext_layers.py` for the Python data layer for this dataset.
16 |
17 | Note that care must be taken to map the raw class annotations into the 59 class task, as handled by our data layer.
18 |
19 | See the dataset site: http://www.cs.stanford.edu/~roozbeh/pascal-context/
20 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/classes.txt:
--------------------------------------------------------------------------------
1 | Semantic and geometric segmentation classes for scenes.
2 |
3 | Semantic: 0 is void and 1–33 are classes.
4 |
5 | 01 awning
6 | 02 balcony
7 | 03 bird
8 | 04 boat
9 | 05 bridge
10 | 06 building
11 | 07 bus
12 | 08 car
13 | 09 cow
14 | 10 crosswalk
15 | 11 desert
16 | 12 door
17 | 13 fence
18 | 14 field
19 | 15 grass
20 | 16 moon
21 | 17 mountain
22 | 18 person
23 | 19 plant
24 | 20 pole
25 | 21 river
26 | 22 road
27 | 23 rock
28 | 24 sand
29 | 25 sea
30 | 26 sidewalk
31 | 27 sign
32 | 28 sky
33 | 29 staircase
34 | 30 streetlight
35 | 31 sun
36 | 32 tree
37 | 33 window
38 |
39 | Geometric: -1 is void and 1–3 are classes.
40 |
41 | 01 sky
42 | 02 horizontal
43 | 03 vertical
44 |
45 | N.B. Three classes (cow, desert, and moon) are absent from the test set, so
46 | they are excluded from evaluation. The highway_bost181 and street_urb506 images
47 | are missing annotations so these are likewise excluded from evaluation.
48 |
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/v1.py:
--------------------------------------------------------------------------------
1 | from base import APC2016Base
2 | from jsk import APC2016jsk
3 | from rbo import APC2016rbo
4 |
5 |
6 | class APC2016V1(APC2016Base):
7 |
8 | def __init__(self, split='train', transform=False):
9 | self.datasets = [
10 | APC2016jsk(split, transform),
11 | APC2016rbo(split, transform),
12 | ]
13 |
14 | def __len__(self):
15 | return sum(len(d) for d in self.datasets)
16 |
17 | @property
18 | def split(self):
19 | split = self.datasets[0].split
20 | assert all(d.split == split for d in self.datasets)
21 | return split
22 |
23 | @split.setter
24 | def split(self, value):
25 | for d in self.datasets:
26 | d.split = value
27 |
28 | def __getitem__(self, index):
29 | skipped = 0
30 | for dataset in self.datasets:
31 | current_index = index - skipped
32 | if current_index < len(dataset):
33 | return dataset[current_index]
34 | skipped += len(dataset)
35 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | test = np.loadtxt('../data/sift-flow/test.txt', dtype=str)
29 |
30 | for _ in range(50):
31 | solver.step(2000)
32 | # N.B. metrics on the semantic labels are off b.c. of missing classes;
33 | # score manually from the histogram instead for proper evaluation
34 | score.seg_tests(solver, False, test, layer='score_sem', gt='sem')
35 | score.seg_tests(solver, False, test, layer='score_geo', gt='geo')
36 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../siftflow-fcn32s/siftflow-fcn32s.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | test = np.loadtxt('../data/sift-flow/test.txt', dtype=str)
29 |
30 | for _ in range(50):
31 | solver.step(2000)
32 | # N.B. metrics on the semantic labels are off b.c. of missing classes;
33 | # score manually from the histogram instead for proper evaluation
34 | score.seg_tests(solver, False, test, layer='score_sem', gt='sem')
35 | score.seg_tests(solver, False, test, layer='score_geo', gt='geo')
36 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../siftflow-fcn16s/siftflow-fcn16s.caffemodel'
15 |
16 | # init
17 | caffe.set_device(int(sys.argv[1]))
18 | caffe.set_mode_gpu()
19 |
20 | solver = caffe.SGDSolver('solver.prototxt')
21 | solver.net.copy_from(weights)
22 |
23 | # surgeries
24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
25 | surgery.interp(solver.net, interp_layers)
26 |
27 | # scoring
28 | test = np.loadtxt('../data/sift-flow/test.txt', dtype=str)
29 |
30 | for _ in range(50):
31 | solver.step(2000)
32 | # N.B. metrics on the semantic labels are off b.c. of missing classes;
33 | # score manually from the histogram instead for proper evaluation
34 | score.seg_tests(solver, False, test, layer='score_sem', gt='sem')
35 | score.seg_tests(solver, False, test, layer='score_geo', gt='geo')
36 |
--------------------------------------------------------------------------------
/util/png.py:
--------------------------------------------------------------------------------
1 | import struct
2 | import zlib
3 |
4 | def encode(buf, width, height):
5 | """ buf: must be bytes or a bytearray in py3, a regular string in py2. formatted RGBRGB... """
6 | assert (width * height * 3 == len(buf))
7 | bpp = 3
8 |
9 | def raw_data():
10 | # reverse the vertical line order and add null bytes at the start
11 | row_bytes = width * bpp
12 | for row_start in range((height - 1) * width * bpp, -1, -row_bytes):
13 | yield b'\x00'
14 | yield buf[row_start:row_start + row_bytes]
15 |
16 | def chunk(tag, data):
17 | return [
18 | struct.pack("!I", len(data)),
19 | tag,
20 | data,
21 | struct.pack("!I", 0xFFFFFFFF & zlib.crc32(data, zlib.crc32(tag)))
22 | ]
23 |
24 | SIGNATURE = b'\x89PNG\r\n\x1a\n'
25 | COLOR_TYPE_RGB = 2
26 | COLOR_TYPE_RGBA = 6
27 | bit_depth = 8
28 | return b''.join(
29 | [ SIGNATURE ] +
30 | chunk(b'IHDR', struct.pack("!2I5B", width, height, bit_depth, COLOR_TYPE_RGB, 0, 0, 0)) +
31 | chunk(b'IDAT', zlib.compress(b''.join(raw_data()), 9)) +
32 | chunk(b'IEND', b'')
33 | )
34 |
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/v2.py:
--------------------------------------------------------------------------------
1 | from base import APC2016Base
2 | from jsk import APC2016jsk
3 | from mit_benchmark import APC2016mit_benchmark
4 | from rbo import APC2016rbo
5 |
6 |
7 | class APC2016V2(APC2016Base):
8 |
9 | def __init__(self, split, transform):
10 | self.datasets = [
11 | APC2016jsk(split, transform),
12 | APC2016rbo(split, transform),
13 | APC2016mit_benchmark(split, transform),
14 | ]
15 |
16 | def __len__(self):
17 | return sum(len(d) for d in self.datasets)
18 |
19 | @property
20 | def split(self):
21 | split = self.datasets[0].split
22 | assert all(d.split == split for d in self.datasets)
23 | return split
24 |
25 | @split.setter
26 | def split(self, value):
27 | for d in self.datasets:
28 | d.split = value
29 |
30 | def __getitem__(self, index):
31 | skipped = 0
32 | for dataset in self.datasets:
33 | current_index = index - skipped
34 | if current_index < len(dataset):
35 | return dataset[current_index]
36 | skipped += len(dataset)
37 |
--------------------------------------------------------------------------------
/torchsrc/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def _fast_hist(label_true, label_pred, n_class):
5 | mask = (label_true >= 0) & (label_true < n_class)
6 | hist = np.bincount(
7 | n_class * label_true[mask].astype(int) +
8 | label_pred[mask], minlength=n_class**2).reshape(n_class, n_class)
9 | return hist
10 |
11 |
12 | def label_accuracy_score(label_trues, label_preds, n_class):
13 | """Returns accuracy score evaluation result.
14 |
15 | - overall accuracy
16 | - mean accuracy
17 | - mean IU
18 | - fwavacc
19 | """
20 | hist = np.zeros((n_class, n_class))
21 | for lt, lp in zip(label_trues, label_preds):
22 | hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
23 | acc = np.diag(hist).sum() / hist.sum()
24 | acc_cls = np.diag(hist) / hist.sum(axis=1)
25 | acc_cls = np.nanmean(acc_cls)
26 | iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
27 | mean_iu = np.nanmean(iu)
28 | freq = hist.sum(axis=1) / hist.sum()
29 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
30 | return acc, acc_cls, mean_iu, fwavacc
31 |
32 |
33 |
--------------------------------------------------------------------------------
/models/models.py:
--------------------------------------------------------------------------------
1 |
2 | def create_model(opt):
3 | model = None
4 | print(opt.model)
5 | if opt.model == 'cycle_gan':
6 | assert(opt.dataset_mode == 'unaligned' or opt.dataset_mode == 'yh')
7 | from .cycle_gan_model import CycleGANModel
8 | model = CycleGANModel()
9 | elif opt.model == 'pix2pix':
10 | assert(opt.dataset_mode == 'aligned')
11 | from .pix2pix_model import Pix2PixModel
12 | model = Pix2PixModel()
13 | elif opt.model == 'cycle_seg':
14 | assert(opt.dataset_mode == 'yh_seg' or opt.dataset_mode == 'yh_seg_spleen')
15 | from .cycle_seg_model import CycleSEGModel
16 | model = CycleSEGModel()
17 | elif opt.model == 'test':
18 | assert(opt.dataset_mode == 'yh_seg')
19 | from .test_model import TestModel
20 | model = TestModel()
21 | elif opt.model == 'test_seg':
22 | assert(opt.dataset_mode == 'yh_test_seg')
23 | from .test_seg_model import TestSegModel
24 | model = TestSegModel()
25 | else:
26 | raise ValueError("Model [%s] not recognized." % opt.model)
27 | model.initialize(opt)
28 | print("model [%s] was created" % (model.name()))
29 | return model
30 |
--------------------------------------------------------------------------------
/util/image_pool.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | import torch
4 | from torch.autograd import Variable
5 | class ImagePool():
6 | def __init__(self, pool_size):
7 | self.pool_size = pool_size
8 | if self.pool_size > 0:
9 | self.num_imgs = 0
10 | self.images = []
11 |
12 | def query(self, images):
13 | if self.pool_size == 0:
14 | return images
15 | return_images = []
16 | for image in images.data:
17 | image = torch.unsqueeze(image, 0)
18 | if self.num_imgs < self.pool_size:
19 | self.num_imgs = self.num_imgs + 1
20 | self.images.append(image)
21 | return_images.append(image)
22 | else:
23 | p = random.uniform(0, 1)
24 | if p > 0.5:
25 | random_id = random.randint(0, self.pool_size-1)
26 | tmp = self.images[random_id].clone()
27 | self.images[random_id] = image
28 | return_images.append(tmp)
29 | else:
30 | return_images.append(image)
31 | return_images = Variable(torch.cat(return_images, 0))
32 | return return_images
33 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
15 | base_net = caffe.Net('../ilsvrc-nets/vgg16fcn.prototxt', '../vgg16fc.caffemodel',
16 | caffe.TEST)
17 |
18 | # init
19 | caffe.set_device(int(sys.argv[1]))
20 | caffe.set_mode_gpu()
21 |
22 | solver = caffe.SGDSolver('solver.prototxt')
23 | surgery.transplant(solver.net, base_net)
24 |
25 | # surgeries
26 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
27 | surgery.interp(solver.net, interp_layers)
28 |
29 | solver.net.params['conv1_1_bgrd'][0].data[:, :3] = base_net.params['conv1_1'][0].data
30 | solver.net.params['conv1_1_bgrd'][0].data[:, 3] = np.mean(base_net.params['conv1_1'][0].data, axis=1)
31 | solver.net.params['conv1_1_bgrd'][1].data[...] = base_net.params['conv1_1'][1].data
32 |
33 | del base_net
34 |
35 | # scoring
36 | test = np.loadtxt('../data/nyud/test.txt', dtype=str)
37 |
38 | for _ in range(50):
39 | solver.step(2000)
40 | score.seg_tests(solver, False, val, layer='score')
41 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/solve.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | import surgery, score
3 |
4 | import numpy as np
5 | import os
6 | import sys
7 |
8 | try:
9 | import setproctitle
10 | setproctitle.setproctitle(os.path.basename(os.getcwd()))
11 | except:
12 | pass
13 |
14 | color_proto = '../nyud-rgb-32s/trainval.prototxt'
15 | color_weights = '../nyud-rgb-32s/nyud-rgb-32s-28k.caffemodel'
16 | hha_proto = '../nyud-hha-32s/trainval.prototxt'
17 | hha_weights = '../nyud-hha-32s/nyud-hha-32s-60k.caffemodel'
18 |
19 | # init
20 | caffe.set_device(int(sys.argv[1]))
21 | caffe.set_mode_gpu()
22 |
23 | solver = caffe.SGDSolver('solver.prototxt')
24 |
25 | # surgeries
26 | color_net = caffe.Net(color_proto, color_weights, caffe.TEST)
27 | surgery.transplant(solver.net, color_net, suffix='color')
28 | del color_net
29 |
30 | hha_net = caffe.Net(hha_proto, hha_weights, caffe.TEST)
31 | surgery.transplant(solver.net, hha_net, suffix='hha')
32 | del hha_net
33 |
34 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
35 | surgery.interp(solver.net, interp_layers)
36 |
37 | # scoring
38 | test = np.loadtxt('../data/nyud/test.txt', dtype=str)
39 |
40 | for _ in range(50):
41 | solver.step(2000)
42 | score.seg_tests(solver, False, val, layer='score')
43 |
--------------------------------------------------------------------------------
/torchsrc/utils/image_pool.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | import torch
4 | from pdb import set_trace as st
5 | from torch.autograd import Variable
6 | class ImagePool():
7 | def __init__(self, pool_size):
8 | self.pool_size = pool_size
9 | if self.pool_size > 0:
10 | self.num_imgs = 0
11 | self.images = []
12 |
13 | def query(self, images):
14 | if self.pool_size == 0:
15 | return images
16 | return_images = []
17 | for image in images.data:
18 | image = torch.unsqueeze(image, 0)
19 | if self.num_imgs < self.pool_size:
20 | self.num_imgs = self.num_imgs + 1
21 | self.images.append(image)
22 | return_images.append(image)
23 | else:
24 | p = random.uniform(0, 1)
25 | if p > 0.5:
26 | random_id = random.randint(0, self.pool_size-1)
27 | tmp = self.images[random_id].clone()
28 | self.images[random_id] = image
29 | return_images.append(tmp)
30 | else:
31 | return_images.append(image)
32 | return_images = Variable(torch.cat(return_images, 0))
33 | return return_images
34 |
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/v3.py:
--------------------------------------------------------------------------------
1 | from base import APC2016Base
2 | from jsk import APC2016jsk
3 | from mit_benchmark import APC2016mit_benchmark
4 | from mit_training import APC2016mit_training
5 | from rbo import APC2016rbo
6 |
7 |
8 | class APC2016V3(APC2016Base):
9 |
10 | def __init__(self, split, transform=False):
11 | if split == 'train':
12 | self.datasets = [
13 | APC2016mit_training(transform),
14 | APC2016jsk('all', transform),
15 | APC2016rbo('all', transform),
16 | ]
17 | elif split == 'valid':
18 | self.datasets = [
19 | APC2016mit_benchmark('all', transform),
20 | ]
21 | else:
22 | raise ValueError('Unsupported split: %s' % split)
23 |
24 | def __len__(self):
25 | return sum(len(d) for d in self.datasets)
26 |
27 | @property
28 | def split(self):
29 | raise RuntimeError('Not supported.')
30 |
31 | @split.setter
32 | def split(self, value):
33 | raise RuntimeError('Not supported.')
34 |
35 | def __getitem__(self, index):
36 | skipped = 0
37 | for dataset in self.datasets:
38 | current_index = index - skipped
39 | if current_index < len(dataset):
40 | return dataset[current_index]
41 | skipped += len(dataset)
42 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/data/pascal/README.md:
--------------------------------------------------------------------------------
1 | # PASCAL VOC and SBD
2 |
3 | PASCAL VOC is a standard recognition dataset and benchmark with detection and semantic segmentation challenges.
4 | The semantic segmentation challenge annotates 20 object classes and background.
5 | The Semantic Boundary Dataset (SBD) is a further annotation of the PASCAL VOC data that provides more semantic segmentation and instance segmentation masks.
6 |
7 | PASCAL VOC has a private test set and [leaderboard for semantic segmentation](http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=6).
8 |
9 | The train/val/test splits of PASCAL VOC segmentation challenge and SBD diverge.
10 | Most notably VOC 2011 segval intersects with SBD train.
11 | Care must be taken for proper evaluation by excluding images from the train or val splits.
12 |
13 | We train on the 8,498 images of SBD train.
14 | We validate on the non-intersecting set defined in the included `seg11valid.txt`.
15 |
16 | Refer to `classes.txt` for the listing of classes in model output order.
17 | Refer to `../voc_layers.py` for the Python data layer for this dataset.
18 |
19 | See the dataset sites for download:
20 |
21 | - PASCAL VOC 2012: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/
22 | - SBD: see [homepage](http://home.bharathh.info/home/sbd) or [direct download](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz)
23 |
--------------------------------------------------------------------------------
/sublist.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import h5py
4 | import random
5 | import linecache
6 |
7 |
8 | def mkdir(path):
9 | if not os.path.exists(path):
10 | os.makedirs(path)
11 |
12 |
13 | def dir2list(path,sub_list_file):
14 | if os.path.exists(sub_list_file):
15 | fp = open(sub_list_file, 'r')
16 | sublines = fp.readlines()
17 | sub_names = []
18 | for subline in sublines:
19 | sub_info = subline.replace('\n', '')
20 | sub_names.append(sub_info)
21 | fp.close()
22 | return sub_names
23 | else:
24 | fp = open(sub_list_file, 'w')
25 | img_root_dir = os.path.join(path)
26 | subs = os.listdir(img_root_dir)
27 | subs.sort()
28 | for sub in subs:
29 | sub_dir = os.path.join(img_root_dir,sub)
30 | views = os.listdir(sub_dir)
31 | views.sort()
32 | for view in views:
33 | view_dir = os.path.join(sub_dir,view)
34 | slices = os.listdir(view_dir)
35 | slices.sort()
36 | for slice in slices:
37 | line = os.path.join(view_dir,slice)
38 | fp.write(line + "\n")
39 | fp.close()
40 |
41 |
42 | def equal_length_two_list(list_A, list_B):
43 | if len(list_A) 0:
48 | img = ImageOps.expand(img, border=self.padding, fill=0)
49 |
50 | w, h = img.size
51 | th, tw = self.size
52 | if w == tw and h == th:
53 | return img
54 | if i==0:
55 | x1 = random.randint(0, w - tw)
56 | y1 = random.randint(0, h - th)
57 | output.append(img.crop((x1, y1, x1 + tw, y1 + th)))
58 | return output
--------------------------------------------------------------------------------
/data/custom_dataset_data_loader.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data
2 | from data.base_data_loader import BaseDataLoader
3 |
4 |
5 | def CreateDataset(opt):
6 | dataset = None
7 | if opt.dataset_mode == 'aligned':
8 | from data.aligned_dataset import AlignedDataset
9 | dataset = AlignedDataset()
10 | elif opt.dataset_mode == 'unaligned':
11 | from data.unaligned_dataset import UnalignedDataset
12 | dataset = UnalignedDataset()
13 | elif opt.dataset_mode == 'single':
14 | from data.single_dataset import SingleDataset
15 | dataset = SingleDataset()
16 | elif opt.dataset_mode == 'yh':
17 | from data.yh_dataset import yhDataset
18 | dataset = yhDataset()
19 | elif opt.dataset_mode == 'yh_seg':
20 | from data.yh_seg_dataset import yhSegDataset
21 | dataset = yhSegDataset()
22 | elif opt.dataset_mode == 'yh_seg_spleen':
23 | from data.yh_seg_spleenonly_dataset import yhSegDatasetSpleenOnly
24 | dataset = yhSegDatasetSpleenOnly()
25 | elif opt.dataset_mode == 'yh_test_seg':
26 | from data.yh_test_seg_dataset import yhTestSegDataset
27 | dataset = yhTestSegDataset()
28 | else:
29 | raise ValueError("Dataset [%s] not recognized." % opt.dataset_mode)
30 |
31 | print("dataset [%s] was created" % (dataset.name()))
32 | dataset.initialize(opt)
33 | return dataset
34 |
35 |
36 | class CustomDatasetDataLoader(BaseDataLoader):
37 | def name(self):
38 | return 'CustomDatasetDataLoader'
39 |
40 | def initialize(self, opt):
41 | BaseDataLoader.initialize(self, opt)
42 | self.dataset = CreateDataset(opt)
43 | self.dataloader = torch.utils.data.DataLoader(
44 | self.dataset,
45 | batch_size=opt.batchSize,
46 | shuffle=not opt.serial_batches,
47 | num_workers=int(opt.nThreads))
48 |
49 | def load_data(self):
50 | return self.dataloader
51 |
52 | def __len__(self):
53 | return min(len(self.dataset), self.opt.max_dataset_size)
54 |
--------------------------------------------------------------------------------
/data/aligned_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import random
3 | import torchvision.transforms as transforms
4 | import torch
5 | from data.base_dataset import BaseDataset
6 | from data.image_folder import make_dataset
7 | from PIL import Image
8 |
9 |
10 | class AlignedDataset(BaseDataset):
11 | def initialize(self, opt):
12 | self.opt = opt
13 | self.root = opt.dataroot
14 | self.dir_AB = os.path.join(opt.dataroot, opt.phase)
15 |
16 | self.AB_paths = sorted(make_dataset(self.dir_AB))
17 |
18 | assert(opt.resize_or_crop == 'resize_and_crop')
19 |
20 | transform_list = [transforms.ToTensor(),
21 | transforms.Normalize((0.5, 0.5, 0.5),
22 | (0.5, 0.5, 0.5))]
23 |
24 | self.transform = transforms.Compose(transform_list)
25 |
26 | def __getitem__(self, index):
27 | AB_path = self.AB_paths[index]
28 | AB = Image.open(AB_path).convert('RGB')
29 | AB = AB.resize((self.opt.loadSize * 2, self.opt.loadSize), Image.BICUBIC)
30 | AB = self.transform(AB)
31 |
32 | w_total = AB.size(2)
33 | w = int(w_total / 2)
34 | h = AB.size(1)
35 | w_offset = random.randint(0, max(0, w - self.opt.fineSize - 1))
36 | h_offset = random.randint(0, max(0, h - self.opt.fineSize - 1))
37 |
38 | A = AB[:, h_offset:h_offset + self.opt.fineSize,
39 | w_offset:w_offset + self.opt.fineSize]
40 | B = AB[:, h_offset:h_offset + self.opt.fineSize,
41 | w + w_offset:w + w_offset + self.opt.fineSize]
42 |
43 | if (not self.opt.no_flip) and random.random() < 0.5:
44 | idx = [i for i in range(A.size(2) - 1, -1, -1)]
45 | idx = torch.LongTensor(idx)
46 | A = A.index_select(2, idx)
47 | B = B.index_select(2, idx)
48 |
49 | return {'A': A, 'B': B,
50 | 'A_paths': AB_path, 'B_paths': AB_path}
51 |
52 | def __len__(self):
53 | return len(self.AB_paths)
54 |
55 | def name(self):
56 | return 'AlignedDataset'
57 |
--------------------------------------------------------------------------------
/models/test_seg_model.py:
--------------------------------------------------------------------------------
1 | from torch.autograd import Variable
2 | from collections import OrderedDict
3 | import util.util as util
4 | from .base_model import BaseModel
5 | from . import networks
6 | import torch
7 |
8 | class TestSegModel(BaseModel):
9 | def name(self):
10 | return 'TestModel'
11 |
12 | def initialize(self, opt):
13 | assert(not opt.isTrain)
14 | BaseModel.initialize(self, opt)
15 | self.input_A = self.Tensor(opt.batchSize, opt.input_nc, opt.fineSize, opt.fineSize)
16 |
17 | self.netG = networks.define_G(opt.input_nc, opt.output_nc,
18 | opt.ngf, opt.which_model_netG,
19 | opt.norm, not opt.no_dropout,
20 | self.gpu_ids)
21 |
22 | self.netG_seg = networks.define_G(opt.input_nc_seg, opt.output_nc_seg,
23 | opt.ngf, opt.which_model_netSeg, opt.norm, not opt.no_dropout, self.gpu_ids)
24 |
25 |
26 |
27 | which_epoch = opt.which_epoch
28 | self.load_network(self.netG, 'G_A', which_epoch)
29 | self.load_network(self.netG_seg, 'Seg_A', which_epoch)
30 |
31 | print('---------- Networks initialized -------------')
32 | # networks.print_network(self.netG)
33 | print('-----------------------------------------------')
34 |
35 | def set_input(self, input):
36 | # we need to use single_dataset mode
37 | input_A = input['A']
38 | self.input_A.resize_(input_A.size()).copy_(input_A)
39 | self.image_paths = input['A_paths']
40 |
41 | def test(self):
42 | self.real_A = Variable(self.input_A)
43 | self.fake_B = self.netG_seg.forward(self.real_A)
44 |
45 | # get image paths
46 | def get_image_paths(self):
47 | return self.image_paths
48 |
49 | def get_current_visuals(self):
50 | real_A = util.tensor2im(self.real_A.data)
51 | fake_B = util.tensor2seg(torch.max(self.fake_B.data,dim=1,keepdim=True)[1])
52 | return OrderedDict([('real_A', real_A), ('fake_B', fake_B)])
53 |
--------------------------------------------------------------------------------
/util/html.py:
--------------------------------------------------------------------------------
1 | import dominate
2 | from dominate.tags import *
3 | import os
4 |
5 |
6 | class HTML:
7 | def __init__(self, web_dir, title, reflesh=0):
8 | self.title = title
9 | self.web_dir = web_dir
10 | self.img_dir = os.path.join(self.web_dir, 'images')
11 | if not os.path.exists(self.web_dir):
12 | os.makedirs(self.web_dir)
13 | if not os.path.exists(self.img_dir):
14 | os.makedirs(self.img_dir)
15 | # print(self.img_dir)
16 |
17 | self.doc = dominate.document(title=title)
18 | if reflesh > 0:
19 | with self.doc.head:
20 | meta(http_equiv="reflesh", content=str(reflesh))
21 |
22 | def get_image_dir(self):
23 | return self.img_dir
24 |
25 | def add_header(self, str):
26 | with self.doc:
27 | h3(str)
28 |
29 | def add_table(self, border=1):
30 | self.t = table(border=border, style="table-layout: fixed;")
31 | self.doc.add(self.t)
32 |
33 | def add_images(self, ims, txts, links, width=400):
34 | self.add_table()
35 | with self.t:
36 | with tr():
37 | for im, txt, link in zip(ims, txts, links):
38 | with td(style="word-wrap: break-word;", halign="center", valign="top"):
39 | with p():
40 | with a(href=os.path.join('images', link)):
41 | img(style="width:%dpx" % width, src=os.path.join('images', im))
42 | br()
43 | p(txt)
44 |
45 | def save(self):
46 | html_file = '%s/index.html' % self.web_dir
47 | f = open(html_file, 'wt')
48 | f.write(self.doc.render())
49 | f.close()
50 |
51 |
52 | if __name__ == '__main__':
53 | html = HTML('web/', 'test_html')
54 | html.add_header('hello world')
55 |
56 | ims = []
57 | txts = []
58 | links = []
59 | for n in range(4):
60 | ims.append('image_%d.png' % n)
61 | txts.append('text_%d' % n)
62 | links.append('image_%d.png' % n)
63 | html.add_images(ims, txts, links)
64 | html.save()
65 |
--------------------------------------------------------------------------------
/data/image_folder.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Code from
3 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
4 | # Modified the original code so that it also loads images from the current
5 | # directory as well as the subdirectories
6 | ###############################################################################
7 |
8 | import torch.utils.data as data
9 |
10 | from PIL import Image
11 | import os
12 | import os.path
13 |
14 | IMG_EXTENSIONS = [
15 | '.jpg', '.JPG', '.jpeg', '.JPEG',
16 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
17 | ]
18 |
19 |
20 | def is_image_file(filename):
21 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
22 |
23 |
24 | def make_dataset(dir):
25 | images = []
26 | assert os.path.isdir(dir), '%s is not a valid directory' % dir
27 |
28 | for root, _, fnames in sorted(os.walk(dir)):
29 | for fname in fnames:
30 | if is_image_file(fname):
31 | path = os.path.join(root, fname)
32 | images.append(path)
33 |
34 | return images
35 |
36 |
37 | def default_loader(path):
38 | return Image.open(path).convert('RGB')
39 |
40 |
41 | class ImageFolder(data.Dataset):
42 |
43 | def __init__(self, root, transform=None, return_paths=False,
44 | loader=default_loader):
45 | imgs = make_dataset(root)
46 | if len(imgs) == 0:
47 | raise(RuntimeError("Found 0 images in: " + root + "\n"
48 | "Supported image extensions are: " +
49 | ",".join(IMG_EXTENSIONS)))
50 |
51 | self.root = root
52 | self.imgs = imgs
53 | self.transform = transform
54 | self.return_paths = return_paths
55 | self.loader = loader
56 |
57 | def __getitem__(self, index):
58 | path = self.imgs[index]
59 | img = self.loader(path)
60 | if self.transform is not None:
61 | img = self.transform(img)
62 | if self.return_paths:
63 | return img, path
64 | else:
65 | return img
66 |
67 | def __len__(self):
68 | return len(self.imgs)
69 |
--------------------------------------------------------------------------------
/data/base_dataset.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data as data
2 | from PIL import Image
3 | import torchvision.transforms as transforms
4 |
5 | class BaseDataset(data.Dataset):
6 | def __init__(self):
7 | super(BaseDataset, self).__init__()
8 |
9 | def name(self):
10 | return 'BaseDataset'
11 |
12 | def initialize(self, opt):
13 | pass
14 |
15 |
16 |
17 |
18 | def get_transform(opt):
19 | transform_list = []
20 | if opt.resize_or_crop == 'resize_and_crop':
21 | osize = [opt.loadSize, opt.loadSize]
22 | transform_list.append(transforms.Scale(osize, Image.BICUBIC))
23 | transform_list.append(transforms.RandomCrop(opt.fineSize))
24 | elif opt.resize_or_crop == 'crop':
25 | transform_list.append(transforms.RandomCrop(opt.fineSize))
26 | elif opt.resize_or_crop == 'scale_width':
27 | transform_list.append(transforms.Lambda(
28 | lambda img: __scale_width(img, opt.fineSize)))
29 | elif opt.resize_or_crop == 'scale_width_and_crop':
30 | transform_list.append(transforms.Lambda(
31 | lambda img: __scale_width(img, opt.loadSize)))
32 | transform_list.append(transforms.RandomCrop(opt.fineSize))
33 | elif opt.resize_or_crop == 'yh_test_resize':
34 | osize = [opt.fineSize, opt.fineSize]
35 | transform_list.append(transforms.Scale(osize, Image.BICUBIC))
36 | # elif opt.resize_or_crop == 'resize':
37 | # osize = [opt.loadSize, opt.loadSize]
38 | # transform_list.append(transforms.Scale(osize, Image.BICUBIC))
39 | # elif opt.resize_or_crop == 'random_crop':
40 | # transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize))
41 |
42 | if opt.isTrain and not opt.no_flip:
43 | transform_list.append(transforms.RandomHorizontalFlip())
44 |
45 | transform_list += [transforms.ToTensor(),
46 | transforms.Normalize((0.5, 0.5, 0.5),
47 | (0.5, 0.5, 0.5))]
48 | return transforms.Compose(transform_list)
49 |
50 | def __scale_width(img, target_width):
51 | ow, oh = img.size
52 | if (ow == target_width):
53 | return img
54 | w = target_width
55 | h = int(target_width * oh / ow)
56 | return img.resize((w, h), Image.BICUBIC)
57 |
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/base.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | from torch.utils import data
5 |
6 |
7 | class APC2016Base(data.Dataset):
8 |
9 | class_names = np.array([
10 | 'background',
11 | 'barkely_hide_bones',
12 | 'cherokee_easy_tee_shirt',
13 | 'clorox_utility_brush',
14 | 'cloud_b_plush_bear',
15 | 'command_hooks',
16 | 'cool_shot_glue_sticks',
17 | 'crayola_24_ct',
18 | 'creativity_chenille_stems',
19 | 'dasani_water_bottle',
20 | 'dove_beauty_bar',
21 | 'dr_browns_bottle_brush',
22 | 'easter_turtle_sippy_cup',
23 | 'elmers_washable_no_run_school_glue',
24 | 'expo_dry_erase_board_eraser',
25 | 'fiskars_scissors_red',
26 | 'fitness_gear_3lb_dumbbell',
27 | 'folgers_classic_roast_coffee',
28 | 'hanes_tube_socks',
29 | 'i_am_a_bunny_book',
30 | 'jane_eyre_dvd',
31 | 'kleenex_paper_towels',
32 | 'kleenex_tissue_box',
33 | 'kyjen_squeakin_eggs_plush_puppies',
34 | 'laugh_out_loud_joke_book',
35 | 'oral_b_toothbrush_green',
36 | 'oral_b_toothbrush_red',
37 | 'peva_shower_curtain_liner',
38 | 'platinum_pets_dog_bowl',
39 | 'rawlings_baseball',
40 | 'rolodex_jumbo_pencil_cup',
41 | 'safety_first_outlet_plugs',
42 | 'scotch_bubble_mailer',
43 | 'scotch_duct_tape',
44 | 'soft_white_lightbulb',
45 | 'staples_index_cards',
46 | 'ticonderoga_12_pencils',
47 | 'up_glucose_bottle',
48 | 'womens_knit_gloves',
49 | 'woods_extension_cord',
50 | ])
51 | mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
52 |
53 | def transform(self, img, lbl):
54 | img = img[:, :, ::-1] # RGB -> BGR
55 | img = img.astype(np.float64)
56 | img -= self.mean_bgr
57 | img = img.transpose(2, 0, 1)
58 | img = torch.from_numpy(img).float()
59 | lbl = torch.from_numpy(lbl).long()
60 | return img, lbl
61 |
62 | def untransform(self, img, lbl):
63 | img = img.numpy()
64 | img = img.transpose(1, 2, 0)
65 | img += self.mean_bgr
66 | img = img.astype(np.uint8)
67 | img = img[:, :, ::-1]
68 | lbl = lbl.numpy()
69 | return img, lbl
70 |
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/rbo.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | import os.path as osp
4 | import re
5 |
6 | import numpy as np
7 | import scipy.misc
8 | from sklearn.model_selection import train_test_split
9 |
10 | from base import APC2016Base
11 |
12 |
13 | class APC2016rbo(APC2016Base):
14 |
15 | def __init__(self, split='train', transform=False):
16 | assert split in ['train', 'valid', 'all']
17 | self.split = split
18 | self._transform = transform
19 | self.dataset_dir = osp.expanduser('~/data/datasets/APC2016/APC2016rbo')
20 | data_ids = self._get_ids()
21 | ids_train, ids_valid = train_test_split(
22 | data_ids, test_size=0.25, random_state=1234)
23 | self._ids = {'train': ids_train, 'valid': ids_valid, 'all': data_ids}
24 |
25 | def __len__(self):
26 | return len(self._ids[self.split])
27 |
28 | def _get_ids(self):
29 | ids = []
30 | for img_file in os.listdir(self.dataset_dir):
31 | if not re.match(r'^.*_[0-9]*_bin_[a-l].jpg$', img_file):
32 | continue
33 | data_id = osp.splitext(img_file)[0]
34 | ids.append(data_id)
35 | return ids
36 |
37 | def _load_from_id(self, data_id):
38 | img_file = osp.join(self.dataset_dir, data_id + '.jpg')
39 | img = scipy.misc.imread(img_file)
40 | # generate label from mask files
41 | lbl = np.zeros(img.shape[:2], dtype=np.int32)
42 | # shelf bin mask file
43 | shelf_bin_mask_file = osp.join(self.dataset_dir, data_id + '.pbm')
44 | shelf_bin_mask = scipy.misc.imread(shelf_bin_mask_file, mode='L')
45 | lbl[shelf_bin_mask < 127] = -1
46 | # object mask files
47 | mask_glob = osp.join(self.dataset_dir, data_id + '_*.pbm')
48 | for mask_file in glob.glob(mask_glob):
49 | mask_id = osp.splitext(osp.basename(mask_file))[0]
50 | mask = scipy.misc.imread(mask_file, mode='L')
51 | lbl_name = mask_id[len(data_id + '_'):]
52 | lbl_id = np.where(self.class_names == lbl_name)[0]
53 | lbl[mask > 127] = lbl_id
54 | return img, lbl
55 |
56 | def __getitem__(self, index):
57 | data_id = self._ids[self.split][index]
58 | img, lbl = self._load_from_id(data_id)
59 | if self._transform:
60 | return self.transform(img, lbl)
61 | else:
62 | return img, lbl
63 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/score.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import caffe
3 | import numpy as np
4 | import os
5 | import sys
6 | from datetime import datetime
7 | from PIL import Image
8 |
9 | def fast_hist(a, b, n):
10 | k = (a >= 0) & (a < n)
11 | return np.bincount(n * a[k].astype(int) + b[k], minlength=n**2).reshape(n, n)
12 |
13 | def compute_hist(net, save_dir, dataset, layer='score', gt='label'):
14 | n_cl = net.blobs[layer].channels
15 | if save_dir:
16 | os.mkdir(save_dir)
17 | hist = np.zeros((n_cl, n_cl))
18 | loss = 0
19 | for idx in dataset:
20 | net.forward()
21 | hist += fast_hist(net.blobs[gt].data[0, 0].flatten(),
22 | net.blobs[layer].data[0].argmax(0).flatten(),
23 | n_cl)
24 |
25 | if save_dir:
26 | im = Image.fromarray(net.blobs[layer].data[0].argmax(0).astype(np.uint8), mode='P')
27 | im.save(os.path.join(save_dir, idx + '.png'))
28 | # compute the loss as well
29 | loss += net.blobs['loss'].data.flat[0]
30 | return hist, loss / len(dataset)
31 |
32 | def seg_tests(solver, save_format, dataset, layer='score', gt='label'):
33 | print '>>>', datetime.now(), 'Begin seg tests'
34 | solver.test_nets[0].share_with(solver.net)
35 | do_seg_tests(solver.test_nets[0], solver.iter, save_format, dataset, layer, gt)
36 |
37 | def do_seg_tests(net, iter, save_format, dataset, layer='score', gt='label'):
38 | n_cl = net.blobs[layer].channels
39 | if save_format:
40 | save_format = save_format.format(iter)
41 | hist, loss = compute_hist(net, save_format, dataset, layer, gt)
42 | # mean loss
43 | print '>>>', datetime.now(), 'Iteration', iter, 'loss', loss
44 | # overall accuracy
45 | acc = np.diag(hist).sum() / hist.sum()
46 | print '>>>', datetime.now(), 'Iteration', iter, 'overall accuracy', acc
47 | # per-class accuracy
48 | acc = np.diag(hist) / hist.sum(1)
49 | print '>>>', datetime.now(), 'Iteration', iter, 'mean accuracy', np.nanmean(acc)
50 | # per-class IU
51 | iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
52 | print '>>>', datetime.now(), 'Iteration', iter, 'mean IU', np.nanmean(iu)
53 | freq = hist.sum(1) / hist.sum()
54 | print '>>>', datetime.now(), 'Iteration', iter, 'fwavacc', \
55 | (freq[freq > 0] * iu[freq > 0]).sum()
56 | return hist
57 |
--------------------------------------------------------------------------------
/torchsrc/utils/util.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import torch
3 | import numpy as np
4 | from PIL import Image
5 | import inspect, re
6 | import numpy as np
7 | import os
8 | import collections
9 |
10 | # Converts a Tensor into a Numpy array
11 | # |imtype|: the desired type of the converted numpy array
12 | def tensor2im(image_tensor, imtype=np.uint8):
13 | image_numpy = image_tensor[0].cpu().float().numpy()
14 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
15 | return image_numpy.astype(imtype)
16 |
17 |
18 | def diagnose_network(net, name='network'):
19 | mean = 0.0
20 | count = 0
21 | for param in net.parameters():
22 | if param.grad is not None:
23 | mean += torch.mean(torch.abs(param.grad.data))
24 | count += 1
25 | if count > 0:
26 | mean = mean / count
27 | print(name)
28 | print(mean)
29 |
30 |
31 | def save_image(image_numpy, image_path):
32 | image_pil = Image.fromarray(image_numpy)
33 | image_pil.save(image_path)
34 |
35 | def info(object, spacing=10, collapse=1):
36 | """Print methods and doc strings.
37 | Takes module, class, list, dictionary, or string."""
38 | methodList = [e for e in dir(object) if isinstance(getattr(object, e), collections.Callable)]
39 | processFunc = collapse and (lambda s: " ".join(s.split())) or (lambda s: s)
40 | print( "\n".join(["%s %s" %
41 | (method.ljust(spacing),
42 | processFunc(str(getattr(object, method).__doc__)))
43 | for method in methodList]) )
44 |
45 | def varname(p):
46 | for line in inspect.getframeinfo(inspect.currentframe().f_back)[3]:
47 | m = re.search(r'\bvarname\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)', line)
48 | if m:
49 | return m.group(1)
50 |
51 | def print_numpy(x, val=True, shp=False):
52 | x = x.astype(np.float64)
53 | if shp:
54 | print('shape,', x.shape)
55 | if val:
56 | x = x.flatten()
57 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
58 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
59 |
60 |
61 | def mkdirs(paths):
62 | if isinstance(paths, list) and not isinstance(paths, str):
63 | for path in paths:
64 | mkdir(path)
65 | else:
66 | mkdir(paths)
67 |
68 |
69 | def mkdir(path):
70 | if not os.path.exists(path):
71 | os.makedirs(path)
72 |
--------------------------------------------------------------------------------
/options/train_options.py:
--------------------------------------------------------------------------------
1 | from .base_options import BaseOptions
2 |
3 |
4 | class TrainOptions(BaseOptions):
5 | def initialize(self):
6 | BaseOptions.initialize(self)
7 | self.parser.add_argument('--display_freq', type=int, default=100, help='frequency of showing training results on screen')
8 | self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console')
9 | self.parser.add_argument('--save_latest_freq', type=int, default=5000, help='frequency of saving the latest results')
10 | self.parser.add_argument('--save_epoch_freq', type=int, default=5, help='frequency of saving checkpoints at the end of epochs')
11 | self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model')
12 | self.parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by , +, ...')
13 | self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc')
14 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
15 | self.parser.add_argument('--niter', type=int, default=100, help='# of iter at starting learning rate')
16 | self.parser.add_argument('--niter_decay', type=int, default=100, help='# of iter to linearly decay learning rate to zero')
17 | self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam')
18 | self.parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam')
19 | self.parser.add_argument('--seg_lr', type=float, default=0.0001, help='initial learning rate for adam for segmentation')
20 | self.parser.add_argument('--no_lsgan', action='store_true', help='do *not* use least square GAN, if false, use vanilla GAN')
21 | self.parser.add_argument('--lambda_A', type=float, default=10.0, help='weight for cycle loss (A -> B -> A)')
22 | self.parser.add_argument('--lambda_B', type=float, default=10.0, help='weight for cycle loss (B -> A -> B)')
23 | self.parser.add_argument('--pool_size', type=int, default=50, help='the size of image buffer that stores previously generated images')
24 | self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/')
25 | self.isTrain = True
26 |
--------------------------------------------------------------------------------
/util/util.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import torch
3 | import numpy as np
4 | from PIL import Image
5 | import inspect, re
6 | import numpy as np
7 | import os
8 | import collections
9 |
10 | # Converts a Tensor into a Numpy array
11 | # |imtype|: the desired type of the converted numpy array
12 | def tensor2im(image_tensor, imtype=np.uint8):
13 | image_numpy = image_tensor[0].cpu().float().numpy()
14 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
15 | return image_numpy.astype(imtype)
16 |
17 | def tensor2seg(image_tensor, imtype=np.uint8):
18 | image_numpy = image_tensor[0].cpu().float().numpy()
19 | image_numpy = np.transpose(image_numpy, (1, 2, 0)) *20
20 | return image_numpy.astype(imtype)
21 |
22 | def diagnose_network(net, name='network'):
23 | mean = 0.0
24 | count = 0
25 | for param in net.parameters():
26 | if param.grad is not None:
27 | mean += torch.mean(torch.abs(param.grad.data))
28 | count += 1
29 | if count > 0:
30 | mean = mean / count
31 | print(name)
32 | print(mean)
33 |
34 |
35 | def save_image(image_numpy, image_path):
36 | if (len(image_numpy.shape)>2):
37 | image_pil = Image.fromarray(image_numpy[:,:,0])
38 | else:
39 | image_pil = Image.fromarray(image_numpy)
40 | image_pil.save(image_path)
41 |
42 | def info(object, spacing=10, collapse=1):
43 | """Print methods and doc strings.
44 | Takes module, class, list, dictionary, or string."""
45 | methodList = [e for e in dir(object) if isinstance(getattr(object, e), collections.Callable)]
46 | processFunc = collapse and (lambda s: " ".join(s.split())) or (lambda s: s)
47 | print( "\n".join(["%s %s" %
48 | (method.ljust(spacing),
49 | processFunc(str(getattr(object, method).__doc__)))
50 | for method in methodList]) )
51 |
52 | def varname(p):
53 | for line in inspect.getframeinfo(inspect.currentframe().f_back)[3]:
54 | m = re.search(r'\bvarname\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)', line)
55 | if m:
56 | return m.group(1)
57 |
58 | def print_numpy(x, val=True, shp=False):
59 | x = x.astype(np.float64)
60 | if shp:
61 | print('shape,', x.shape)
62 | if val:
63 | x = x.flatten()
64 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
65 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
66 |
67 |
68 | def mkdirs(paths):
69 | if isinstance(paths, list) and not isinstance(paths, str):
70 | for path in paths:
71 | mkdir(path)
72 | else:
73 | mkdir(paths)
74 |
75 |
76 | def mkdir(path):
77 | if not os.path.exists(path):
78 | os.makedirs(path)
79 |
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/mit_training.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 |
4 | import numpy as np
5 | import skimage.io
6 | import yaml
7 |
8 | from base import APC2016Base
9 |
10 |
11 | here = osp.dirname(osp.abspath(__file__))
12 |
13 |
14 | class APC2016mit_training(APC2016Base):
15 |
16 | dataset_dir = osp.expanduser('~/data/datasets/APC2016/training')
17 |
18 | def __init__(self, transform=False):
19 | self._transform = transform
20 | # drop by blacklist
21 | self._ids = []
22 | with open(osp.join(here, 'data/mit_training_blacklist.yaml')) as f:
23 | blacklist = yaml.load(f)
24 | for index, data_id in enumerate(self._get_ids()):
25 | if index in blacklist:
26 | print('WARNING: skipping index=%d data' % index)
27 | continue
28 | self._ids.append(data_id)
29 |
30 | def __len__(self):
31 | return len(self._ids)
32 |
33 | @classmethod
34 | def _get_ids(cls):
35 | for loc in ['shelf', 'tote']:
36 | loc_dir = osp.join(cls.dataset_dir, loc)
37 | for cls_id, cls_name in enumerate(cls.class_names):
38 | if cls_id == 0: # background
39 | continue
40 | cls_dir = osp.join(loc_dir, cls_name)
41 | scene_dir_empty = osp.join(cls_dir, 'scene-empty')
42 | for scene_dir in os.listdir(cls_dir):
43 | scene_dir = osp.join(cls_dir, scene_dir)
44 | for frame_id in xrange(0, 18):
45 | empty_file = osp.join(
46 | scene_dir_empty, 'frame-%06d.color.png' % frame_id)
47 | rgb_file = osp.join(
48 | scene_dir, 'frame-%06d.color.png' % frame_id)
49 | mask_file = osp.join(
50 | scene_dir, 'masks',
51 | 'frame-%06d.mask.png' % frame_id)
52 | if osp.exists(rgb_file) and osp.exists(mask_file):
53 | yield empty_file, rgb_file, mask_file, cls_id
54 |
55 | @staticmethod
56 | def _load_from_id(data_id):
57 | empty_file, rgb_file, mask_file, cls_id = data_id
58 | img = skimage.io.imread(rgb_file)
59 | img_empty = skimage.io.imread(empty_file)
60 | mask = skimage.io.imread(mask_file, as_grey=True) >= 0.5
61 | lbl = np.zeros(mask.shape, dtype=np.int32)
62 | lbl[mask] = cls_id
63 | img_empty[mask] = img[mask]
64 | return img_empty, lbl
65 |
66 | def __getitem__(self, index):
67 | data_id = self._ids[index]
68 | img, lbl = self._load_from_id(data_id)
69 | if self._transform:
70 | return self.transform(img, lbl)
71 | else:
72 | return img, lbl
73 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/net.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('../../python')
3 |
4 | import caffe
5 | from caffe import layers as L, params as P
6 | from caffe.coord_map import crop
7 |
8 | def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1):
9 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
10 | num_output=nout, pad=pad, group=group)
11 | return conv, L.ReLU(conv, in_place=True)
12 |
13 | def max_pool(bottom, ks, stride=1):
14 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
15 |
16 | def fcn(split):
17 | n = caffe.NetSpec()
18 | pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
19 | seed=1337)
20 | if split == 'train':
21 | pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
22 | pylayer = 'SBDDSegDataLayer'
23 | else:
24 | pydata_params['voc_dir'] = '../data/pascal/VOC2011'
25 | pylayer = 'VOCSegDataLayer'
26 | n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
27 | ntop=2, param_str=str(pydata_params))
28 |
29 | # the base net
30 | n.conv1, n.relu1 = conv_relu(n.data, 11, 96, stride=4, pad=100)
31 | n.pool1 = max_pool(n.relu1, 3, stride=2)
32 | n.norm1 = L.LRN(n.pool1, local_size=5, alpha=1e-4, beta=0.75)
33 | n.conv2, n.relu2 = conv_relu(n.norm1, 5, 256, pad=2, group=2)
34 | n.pool2 = max_pool(n.relu2, 3, stride=2)
35 | n.norm2 = L.LRN(n.pool2, local_size=5, alpha=1e-4, beta=0.75)
36 | n.conv3, n.relu3 = conv_relu(n.norm2, 3, 384, pad=1)
37 | n.conv4, n.relu4 = conv_relu(n.relu3, 3, 384, pad=1, group=2)
38 | n.conv5, n.relu5 = conv_relu(n.relu4, 3, 256, pad=1, group=2)
39 | n.pool5 = max_pool(n.relu5, 3, stride=2)
40 |
41 | # fully conv
42 | n.fc6, n.relu6 = conv_relu(n.pool5, 6, 4096)
43 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
44 | n.fc7, n.relu7 = conv_relu(n.drop6, 1, 4096)
45 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
46 |
47 | n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
48 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
49 | n.upscore = L.Deconvolution(n.score_fr,
50 | convolution_param=dict(num_output=21, kernel_size=63, stride=32,
51 | bias_term=False),
52 | param=[dict(lr_mult=0)])
53 | n.score = crop(n.upscore, n.data)
54 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
55 | loss_param=dict(normalize=True, ignore_label=255))
56 |
57 | return n.to_proto()
58 |
59 | def make_net():
60 | with open('train.prototxt', 'w') as f:
61 | f.write(str(fcn('train')))
62 |
63 | with open('val.prototxt', 'w') as f:
64 | f.write(str(fcn('seg11valid')))
65 |
66 | if __name__ == '__main__':
67 | make_net()
68 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/surgery.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import caffe
3 | import numpy as np
4 |
5 | def transplant(new_net, net, suffix=''):
6 | """
7 | Transfer weights by copying matching parameters, coercing parameters of
8 | incompatible shape, and dropping unmatched parameters.
9 |
10 | The coercion is useful to convert fully connected layers to their
11 | equivalent convolutional layers, since the weights are the same and only
12 | the shapes are different. In particular, equivalent fully connected and
13 | convolution layers have shapes O x I and O x I x H x W respectively for O
14 | outputs channels, I input channels, H kernel height, and W kernel width.
15 |
16 | Both `net` to `new_net` arguments must be instantiated `caffe.Net`s.
17 | """
18 | for p in net.params:
19 | p_new = p + suffix
20 | if p_new not in new_net.params:
21 | print 'dropping', p
22 | continue
23 | for i in range(len(net.params[p])):
24 | if i > (len(new_net.params[p_new]) - 1):
25 | print 'dropping', p, i
26 | break
27 | if net.params[p][i].data.shape != new_net.params[p_new][i].data.shape:
28 | print 'coercing', p, i, 'from', net.params[p][i].data.shape, 'to', new_net.params[p_new][i].data.shape
29 | else:
30 | print 'copying', p, ' -> ', p_new, i
31 | new_net.params[p_new][i].data.flat = net.params[p][i].data.flat
32 |
33 | def upsample_filt(size):
34 | """
35 | Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size.
36 | """
37 | factor = (size + 1) // 2
38 | if size % 2 == 1:
39 | center = factor - 1
40 | else:
41 | center = factor - 0.5
42 | og = np.ogrid[:size, :size]
43 | return (1 - abs(og[0] - center) / factor) * \
44 | (1 - abs(og[1] - center) / factor)
45 |
46 | def interp(net, layers):
47 | """
48 | Set weights of each layer in layers to bilinear kernels for interpolation.
49 | """
50 | for l in layers:
51 | m, k, h, w = net.params[l][0].data.shape
52 | if m != k and k != 1:
53 | print 'input + output channels need to be the same or |output| == 1'
54 | raise
55 | if h != w:
56 | print 'filters need to be square'
57 | raise
58 | filt = upsample_filt(h)
59 | net.params[l][0].data[range(m), range(k), :, :] = filt
60 |
61 | def expand_score(new_net, new_layer, net, layer):
62 | """
63 | Transplant an old score layer's parameters, with k < k' classes, into a new
64 | score layer with k classes s.t. the first k' are the old classes.
65 | """
66 | old_cl = net.params[layer][0].num
67 | new_net.params[new_layer][0].data[:old_cl][...] = net.params[layer][0].data
68 | new_net.params[new_layer][1].data[0,0,0,:old_cl][...] = net.params[layer][1].data
69 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split, tops):
15 | n = caffe.NetSpec()
16 | n.data, n.label = L.Python(module='nyud_layers',
17 | layer='NYUDSegDataLayer', ntop=2,
18 | param_str=str(dict(nyud_dir='../data/nyud', split=split,
19 | tops=tops, seed=1337)))
20 |
21 | # the base net
22 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
23 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
24 | n.pool1 = max_pool(n.relu1_2)
25 |
26 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
27 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
28 | n.pool2 = max_pool(n.relu2_2)
29 |
30 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
31 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
32 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
33 | n.pool3 = max_pool(n.relu3_3)
34 |
35 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
36 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
37 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
38 | n.pool4 = max_pool(n.relu4_3)
39 |
40 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
41 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
42 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
43 | n.pool5 = max_pool(n.relu5_3)
44 |
45 | # fully conv
46 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
47 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
48 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
49 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
50 |
51 | n.score_fr = L.Convolution(n.drop7, num_output=40, kernel_size=1, pad=0,
52 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
53 | n.upscore = L.Deconvolution(n.score_fr,
54 | convolution_param=dict(num_output=40, kernel_size=64, stride=32,
55 | bias_term=False),
56 | param=[dict(lr_mult=0)])
57 | n.score = crop(n.upscore, n.data)
58 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
59 | loss_param=dict(normalize=False, ignore_label=255))
60 |
61 | return n.to_proto()
62 |
63 | def make_net():
64 | tops = ['hha', 'label']
65 | with open('trainval.prototxt', 'w') as f:
66 | f.write(str(fcn('trainval', tops)))
67 |
68 | with open('test.prototxt', 'w') as f:
69 | f.write(str(fcn('test', tops)))
70 |
71 | if __name__ == '__main__':
72 | make_net()
73 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split, tops):
15 | n = caffe.NetSpec()
16 | n.data, n.label = L.Python(module='nyud_layers',
17 | layer='NYUDSegDataLayer', ntop=2,
18 | param_str=str(dict(nyud_dir='../data/nyud', split=split,
19 | tops=tops, seed=1337)))
20 |
21 | # the base net
22 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
23 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
24 | n.pool1 = max_pool(n.relu1_2)
25 |
26 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
27 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
28 | n.pool2 = max_pool(n.relu2_2)
29 |
30 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
31 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
32 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
33 | n.pool3 = max_pool(n.relu3_3)
34 |
35 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
36 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
37 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
38 | n.pool4 = max_pool(n.relu4_3)
39 |
40 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
41 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
42 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
43 | n.pool5 = max_pool(n.relu5_3)
44 |
45 | # fully conv
46 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
47 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
48 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
49 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
50 |
51 | n.score_fr = L.Convolution(n.drop7, num_output=40, kernel_size=1, pad=0,
52 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
53 | n.upscore = L.Deconvolution(n.score_fr,
54 | convolution_param=dict(num_output=40, kernel_size=64, stride=32,
55 | bias_term=False),
56 | param=[dict(lr_mult=0)])
57 | n.score = crop(n.upscore, n.data)
58 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
59 | loss_param=dict(normalize=False, ignore_label=255))
60 |
61 | return n.to_proto()
62 |
63 | def make_net():
64 | tops = ['color', 'label']
65 | with open('trainval.prototxt', 'w') as f:
66 | f.write(str(fcn('trainval', tops)))
67 |
68 | with open('test.prototxt', 'w') as f:
69 | f.write(str(fcn('test', tops)))
70 |
71 | if __name__ == '__main__':
72 | make_net()
73 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split):
15 | n = caffe.NetSpec()
16 | n.data, n.label = L.Python(module='pascalcontext_layers',
17 | layer='PASCALContextSegDataLayer', ntop=2,
18 | param_str=str(dict(voc_dir='../../data/pascal',
19 | context_dir='../../data/pascal-context', split=split,
20 | seed=1337)))
21 |
22 | # the base net
23 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
24 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
25 | n.pool1 = max_pool(n.relu1_2)
26 |
27 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
28 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
29 | n.pool2 = max_pool(n.relu2_2)
30 |
31 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
32 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
33 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
34 | n.pool3 = max_pool(n.relu3_3)
35 |
36 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
37 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
38 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
39 | n.pool4 = max_pool(n.relu4_3)
40 |
41 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
42 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
43 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
44 | n.pool5 = max_pool(n.relu5_3)
45 |
46 | # fully conv
47 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
48 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
49 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
50 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
51 |
52 | n.score_fr = L.Convolution(n.drop7, num_output=60, kernel_size=1, pad=0,
53 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
54 | n.upscore = L.Deconvolution(n.score_fr,
55 | convolution_param=dict(num_output=60, kernel_size=64, stride=32,
56 | bias_term=False),
57 | param=[dict(lr_mult=0)])
58 | n.score = crop(n.upscore, n.data)
59 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
60 | loss_param=dict(normalize=False, ignore_label=255))
61 |
62 |
63 | return n.to_proto()
64 |
65 | def make_net():
66 | with open('train.prototxt', 'w') as f:
67 | f.write(str(fcn('train')))
68 |
69 | with open('val.prototxt', 'w') as f:
70 | f.write(str(fcn('val')))
71 |
72 | if __name__ == '__main__':
73 | make_net()
74 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split, tops):
15 | n = caffe.NetSpec()
16 | n.color, n.depth, n.label = L.Python(module='nyud_layers',
17 | layer='NYUDSegDataLayer', ntop=3,
18 | param_str=str(dict(nyud_dir='../data/nyud', split=split,
19 | tops=tops, seed=1337)))
20 | n.data = L.Concat(n.color, n.depth)
21 |
22 | # the base net
23 | n.conv1_1_bgrd, n.relu1_1 = conv_relu(n.data, 64, pad=100)
24 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
25 | n.pool1 = max_pool(n.relu1_2)
26 |
27 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
28 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
29 | n.pool2 = max_pool(n.relu2_2)
30 |
31 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
32 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
33 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
34 | n.pool3 = max_pool(n.relu3_3)
35 |
36 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
37 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
38 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
39 | n.pool4 = max_pool(n.relu4_3)
40 |
41 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
42 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
43 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
44 | n.pool5 = max_pool(n.relu5_3)
45 |
46 | # fully conv
47 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
48 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
49 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
50 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
51 |
52 | n.score_fr = L.Convolution(n.drop7, num_output=40, kernel_size=1, pad=0,
53 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
54 | n.upscore = L.Deconvolution(n.score_fr,
55 | convolution_param=dict(num_output=40, kernel_size=64, stride=32,
56 | bias_term=False),
57 | param=[dict(lr_mult=0)])
58 | n.score = crop(n.upscore, n.data)
59 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
60 | loss_param=dict(normalize=False, ignore_label=255))
61 |
62 | return n.to_proto()
63 |
64 | def make_net():
65 | tops = ['color', 'depth', 'label']
66 | with open('trainval.prototxt', 'w') as f:
67 | f.write(str(fcn('trainval', tops)))
68 |
69 | with open('test.prototxt', 'w') as f:
70 | f.write(str(fcn('test', tops)))
71 |
72 | if __name__ == '__main__':
73 | make_net()
74 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split):
15 | n = caffe.NetSpec()
16 | pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
17 | seed=1337)
18 | if split == 'train':
19 | pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
20 | pylayer = 'SBDDSegDataLayer'
21 | else:
22 | pydata_params['voc_dir'] = '../data/pascal/VOC2011'
23 | pylayer = 'VOCSegDataLayer'
24 | n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
25 | ntop=2, param_str=str(pydata_params))
26 |
27 | # the base net
28 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
29 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
30 | n.pool1 = max_pool(n.relu1_2)
31 |
32 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
33 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
34 | n.pool2 = max_pool(n.relu2_2)
35 |
36 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
37 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
38 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
39 | n.pool3 = max_pool(n.relu3_3)
40 |
41 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
42 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
43 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
44 | n.pool4 = max_pool(n.relu4_3)
45 |
46 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
47 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
48 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
49 | n.pool5 = max_pool(n.relu5_3)
50 |
51 | # fully conv
52 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
53 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
54 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
55 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
56 | n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
57 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
58 | n.upscore = L.Deconvolution(n.score_fr,
59 | convolution_param=dict(num_output=21, kernel_size=64, stride=32,
60 | bias_term=False),
61 | param=[dict(lr_mult=0)])
62 | n.score = crop(n.upscore, n.data)
63 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
64 | loss_param=dict(normalize=False, ignore_label=255))
65 |
66 | return n.to_proto()
67 |
68 | def make_net():
69 | with open('train.prototxt', 'w') as f:
70 | f.write(str(fcn('train')))
71 |
72 | with open('val.prototxt', 'w') as f:
73 | f.write(str(fcn('seg11valid')))
74 |
75 | if __name__ == '__main__':
76 | make_net()
77 |
--------------------------------------------------------------------------------
/LICENSE1:
--------------------------------------------------------------------------------
1 | Copyright (c) 2018, Yuankai Huo
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | * Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 |
25 |
26 | --------------------------- LICENSE FOR EssNet --------------------------------
27 | BSD License
28 |
29 | For EssNet software
30 | Copyright (c) 2018, Yuankai Huo
31 | All rights reserved.
32 |
33 | Redistribution and use in source and binary forms, with or without
34 | modification, are permitted provided that the following conditions are met:
35 |
36 | * Redistributions of source code must retain the above copyright notice, this
37 | list of conditions and the following disclaimer.
38 |
39 | * Redistributions in binary form must reproduce the above copyright notice,
40 | this list of conditions and the following disclaimer in the documentation
41 | and/or other materials provided with the distribution.
42 |
43 |
44 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
45 |
46 | Neither the name Facebook nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
47 |
48 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split):
15 | n = caffe.NetSpec()
16 | n.data, n.label = L.Python(module='pascalcontext_layers',
17 | layer='PASCALContextSegDataLayer', ntop=2,
18 | param_str=str(dict(voc_dir='../../data/pascal',
19 | context_dir='../../data/pascal-context', split=split,
20 | seed=1337)))
21 |
22 | # the base net
23 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
24 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
25 | n.pool1 = max_pool(n.relu1_2)
26 |
27 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
28 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
29 | n.pool2 = max_pool(n.relu2_2)
30 |
31 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
32 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
33 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
34 | n.pool3 = max_pool(n.relu3_3)
35 |
36 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
37 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
38 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
39 | n.pool4 = max_pool(n.relu4_3)
40 |
41 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
42 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
43 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
44 | n.pool5 = max_pool(n.relu5_3)
45 |
46 | # fully conv
47 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
48 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
49 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
50 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
51 |
52 | n.score_fr = L.Convolution(n.drop7, num_output=60, kernel_size=1, pad=0,
53 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
54 | n.upscore2 = L.Deconvolution(n.score_fr,
55 | convolution_param=dict(num_output=60, kernel_size=4, stride=2,
56 | bias_term=False),
57 | param=[dict(lr_mult=0)])
58 |
59 | n.score_pool4 = L.Convolution(n.pool4, num_output=60, kernel_size=1, pad=0,
60 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
61 | n.score_pool4c = crop(n.score_pool4, n.upscore2)
62 | n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
63 | operation=P.Eltwise.SUM)
64 | n.upscore16 = L.Deconvolution(n.fuse_pool4,
65 | convolution_param=dict(num_output=60, kernel_size=32, stride=16,
66 | bias_term=False),
67 | param=[dict(lr_mult=0)])
68 |
69 | n.score = crop(n.upscore16, n.data)
70 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
71 | loss_param=dict(normalize=False, ignore_label=255))
72 |
73 | return n.to_proto()
74 |
75 | def make_net():
76 | with open('train.prototxt', 'w') as f:
77 | f.write(str(fcn('train')))
78 |
79 | with open('val.prototxt', 'w') as f:
80 | f.write(str(fcn('val')))
81 |
82 | if __name__ == '__main__':
83 | make_net()
84 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split):
15 | n = caffe.NetSpec()
16 | n.data, n.sem, n.geo = L.Python(module='siftflow_layers',
17 | layer='SIFTFlowSegDataLayer', ntop=3,
18 | param_str=str(dict(siftflow_dir='../data/sift-flow',
19 | split=split, seed=1337)))
20 |
21 | # the base net
22 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
23 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
24 | n.pool1 = max_pool(n.relu1_2)
25 |
26 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
27 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
28 | n.pool2 = max_pool(n.relu2_2)
29 |
30 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
31 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
32 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
33 | n.pool3 = max_pool(n.relu3_3)
34 |
35 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
36 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
37 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
38 | n.pool4 = max_pool(n.relu4_3)
39 |
40 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
41 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
42 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
43 | n.pool5 = max_pool(n.relu5_3)
44 |
45 | # fully conv
46 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
47 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
48 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
49 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
50 |
51 | n.score_fr_sem = L.Convolution(n.drop7, num_output=33, kernel_size=1, pad=0,
52 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
53 | n.upscore_sem = L.Deconvolution(n.score_fr_sem,
54 | convolution_param=dict(num_output=33, kernel_size=64, stride=32,
55 | bias_term=False),
56 | param=[dict(lr_mult=0)])
57 | n.score_sem = crop(n.upscore_sem, n.data)
58 | # loss to make score happy (o.w. loss_sem)
59 | n.loss = L.SoftmaxWithLoss(n.score_sem, n.sem,
60 | loss_param=dict(normalize=False, ignore_label=255))
61 |
62 | n.score_fr_geo = L.Convolution(n.drop7, num_output=3, kernel_size=1, pad=0,
63 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
64 | n.upscore_geo = L.Deconvolution(n.score_fr_geo,
65 | convolution_param=dict(num_output=3, kernel_size=64, stride=32,
66 | bias_term=False),
67 | param=[dict(lr_mult=0)])
68 | n.score_geo = crop(n.upscore_geo, n.data)
69 | n.loss_geo = L.SoftmaxWithLoss(n.score_geo, n.geo,
70 | loss_param=dict(normalize=False, ignore_label=255))
71 |
72 | return n.to_proto()
73 |
74 | def make_net():
75 | with open('trainval.prototxt', 'w') as f:
76 | f.write(str(fcn('trainval')))
77 |
78 | with open('test.prototxt', 'w') as f:
79 | f.write(str(fcn('test')))
80 |
81 | if __name__ == '__main__':
82 | make_net()
83 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split):
15 | n = caffe.NetSpec()
16 | pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
17 | seed=1337)
18 | if split == 'train':
19 | pydata_params['sbdd_dir'] = '../../data/sbdd/dataset'
20 | pylayer = 'SBDDSegDataLayer'
21 | else:
22 | pydata_params['voc_dir'] = '../../data/pascal/VOC2011'
23 | pylayer = 'VOCSegDataLayer'
24 | n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
25 | ntop=2, param_str=str(pydata_params))
26 |
27 | # the base net
28 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
29 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
30 | n.pool1 = max_pool(n.relu1_2)
31 |
32 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
33 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
34 | n.pool2 = max_pool(n.relu2_2)
35 |
36 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
37 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
38 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
39 | n.pool3 = max_pool(n.relu3_3)
40 |
41 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
42 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
43 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
44 | n.pool4 = max_pool(n.relu4_3)
45 |
46 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
47 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
48 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
49 | n.pool5 = max_pool(n.relu5_3)
50 |
51 | # fully conv
52 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
53 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
54 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
55 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
56 | n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
57 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
58 | n.upscore2 = L.Deconvolution(n.score_fr,
59 | convolution_param=dict(num_output=21, kernel_size=4, stride=2,
60 | bias_term=False),
61 | param=[dict(lr_mult=0)])
62 |
63 | n.score_pool4 = L.Convolution(n.pool4, num_output=21, kernel_size=1, pad=0,
64 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
65 | n.score_pool4c = crop(n.score_pool4, n.upscore2)
66 | n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
67 | operation=P.Eltwise.SUM)
68 | n.upscore16 = L.Deconvolution(n.fuse_pool4,
69 | convolution_param=dict(num_output=21, kernel_size=32, stride=16,
70 | bias_term=False),
71 | param=[dict(lr_mult=0)])
72 |
73 | n.score = crop(n.upscore16, n.data)
74 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
75 | loss_param=dict(normalize=False, ignore_label=255))
76 |
77 | return n.to_proto()
78 |
79 | def make_net():
80 | with open('train.prototxt', 'w') as f:
81 | f.write(str(fcn('train')))
82 |
83 | with open('val.prototxt', 'w') as f:
84 | f.write(str(fcn('seg11valid')))
85 |
86 | if __name__ == '__main__':
87 | make_net()
88 |
--------------------------------------------------------------------------------
/LICENSE2:
--------------------------------------------------------------------------------
1 | Copyright (c) 2017, Jun-Yan Zhu and Taesung Park
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | * Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 |
25 |
26 | --------------------------- LICENSE FOR pix2pix --------------------------------
27 | BSD License
28 |
29 | For pix2pix software
30 | Copyright (c) 2016, Phillip Isola and Jun-Yan Zhu
31 | All rights reserved.
32 |
33 | Redistribution and use in source and binary forms, with or without
34 | modification, are permitted provided that the following conditions are met:
35 |
36 | * Redistributions of source code must retain the above copyright notice, this
37 | list of conditions and the following disclaimer.
38 |
39 | * Redistributions in binary form must reproduce the above copyright notice,
40 | this list of conditions and the following disclaimer in the documentation
41 | and/or other materials provided with the distribution.
42 |
43 | ----------------------------- LICENSE FOR DCGAN --------------------------------
44 | BSD License
45 |
46 | For dcgan.torch software
47 |
48 | Copyright (c) 2015, Facebook, Inc. All rights reserved.
49 |
50 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
51 |
52 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
53 |
54 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
55 |
56 | Neither the name Facebook nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
57 |
58 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59 |
--------------------------------------------------------------------------------
/data/yh_seg_spleenonly_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import torchvision.transforms as transforms
3 | from data.base_dataset import BaseDataset, get_transform
4 | from PIL import Image
5 | import torch
6 | import random
7 | import random_crop_yh
8 |
9 | class yhSegDatasetSpleenOnly(BaseDataset):
10 | def initialize(self, opt):
11 | self.opt = opt
12 | self.root = opt.dataroot
13 | self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A')
14 | self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B')
15 |
16 | self.dir_A = opt.raw_MRI_dir
17 | self.dir_B = opt.raw_CT_dir
18 | self.dir_Seg = opt.raw_MRI_seg_dir
19 |
20 | self.A_paths = opt.imglist_MRI
21 | self.B_paths = opt.imglist_CT
22 |
23 | self.A_size = len(self.A_paths)
24 | self.B_size = len(self.B_paths)
25 | if not self.opt.isTrain:
26 | self.skipcrop = True
27 | else:
28 | self.skipcrop = False
29 | # self.transform = get_transform(opt)
30 |
31 | if self.skipcrop:
32 | osize = [opt.fineSize, opt.fineSize]
33 | else:
34 | osize = [opt.loadSize, opt.loadSize]
35 | transform_list = []
36 | transform_list.append(transforms.Scale(osize, Image.BICUBIC))
37 | self.transforms_scale = transforms.Compose(transform_list)
38 |
39 | transform_list = []
40 | transform_list.append(transforms.Scale(osize, Image.NEAREST))
41 | self.transforms_seg_scale = transforms.Compose(transform_list)
42 |
43 | transform_list = []
44 | transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize))
45 | self.transforms_crop = transforms.Compose(transform_list)
46 |
47 | transform_list = []
48 | transform_list.append(transforms.ToTensor())
49 | self.transforms_toTensor = transforms.Compose(transform_list)
50 |
51 | transform_list = []
52 | transform_list.append(transforms.Normalize((0.5, 0.5, 0.5),
53 | (0.5, 0.5, 0.5)))
54 | self.transforms_normalize = transforms.Compose(transform_list)
55 |
56 |
57 | def __getitem__(self, index):
58 | index_A = index % self.A_size
59 | A_path = self.A_paths[index_A]
60 | Seg_path = A_path.replace(self.dir_A,self.dir_Seg)
61 | Seg_path = Seg_path.replace('_rawimg','_organlabel')
62 |
63 | index_B = random.randint(0, self.B_size - 1)
64 | B_path = self.B_paths[index_B]
65 | A_img = Image.open(A_path).convert('L')
66 | Seg_img = Image.open(Seg_path).convert('I')
67 | B_img = Image.open(B_path).convert('L')
68 |
69 | A_img = self.transforms_scale(A_img)
70 | B_img = self.transforms_scale(B_img)
71 | Seg_img = self.transforms_seg_scale(Seg_img)
72 |
73 | if not self.skipcrop:
74 | [A_img,Seg_img] = self.transforms_crop([A_img, Seg_img])
75 | [B_img] = self.transforms_crop([B_img])
76 |
77 | A_img = self.transforms_toTensor(A_img)
78 | B_img = self.transforms_toTensor(B_img)
79 | Seg_img = self.transforms_toTensor(Seg_img)
80 |
81 | A_img = self.transforms_normalize(A_img)
82 | B_img = self.transforms_normalize(B_img)
83 |
84 | Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize)
85 | Seg_imgs[0, :, :] = Seg_img == 0
86 | Seg_imgs[1, :, :] = Seg_img == 1
87 |
88 |
89 | return {'A': A_img, 'B': B_img, 'Seg': Seg_imgs, 'Seg_one': Seg_img,
90 | 'A_paths': A_path, 'B_paths': B_path, 'Seg_paths':Seg_path}
91 |
92 |
93 | def __len__(self):
94 | return max(self.A_size, self.B_size)
95 |
96 | def name(self):
97 | return 'UnalignedDataset'
98 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SynSeg-Net
2 | (End-to-end Synthesis and Segmentation Network)
3 |
4 | ## Adversarial Synthesis Learning Enables Segmentation Without Target Modality Ground Truth
5 |
6 | This is our ongoing PyTorch implementation for end-to-end synthesis and segmentation without groudtruth.
7 | The paper can be found in [arXiv](https://arxiv.org/abs/1712.07695) for ISBI 2018
8 | The video can be found in [video](https://youtu.be/LTTh1WWPZ1o) on youtube
9 |
10 | The code was written by [Yuankai Huo](https://sites.google.com/site/yuankaihuo/) and developed upon [CycleGAN Torch](https://github.com/junyanz/CycleGAN).
11 |
12 |
13 |
14 |
15 |
16 |
17 | If you use this code for your research, please cite :
18 |
19 | Yuankai Huo, Zhoubing Xu, Shunxing Bao, Albert Assad, Richard G. Abramson, Bennett A. Landman. [Adversarial Synthesis Learning Enables Segmentation Without Target Modality Ground Truth.](https://arxiv.org/abs/1712.07695) In [arXiv](https://arxiv.org/abs/1712.07695) (2017).
20 |
21 | or
22 |
23 | Yuankai Huo, Zhoubing Xu, Hyeonsoo Moon, Shunxing Bao, Albert Assad, Tamara K. Moyo, Michael R. Savona, Richard G. Abramson, and Bennett A. Landman. [SynSeg-Net: Synthetic Segmentation Without Target Modality Ground Truth.](https://arxiv.org/abs/1810.06498) IEEE transactions on medical imaging (2018).
24 |
25 |
26 |
27 | ## Prerequisites
28 | - Linux or macOS
29 | - Python 2
30 | - CPU or NVIDIA GPU + CUDA CuDNN
31 | - pytorch 0.2
32 |
33 | ## Training Data and Testing Data
34 | We used MRI and CT 2D slices (from coronal view) as well as MRI segmentatons as training data.
35 | We used CT 2D slices (from coronal view) as testing data
36 | The data orgnization can be seen in the txt files in `sublist` directory
37 |
38 | ## Training
39 | - Train the model
40 | ```bash
41 | python train_yh.py --dataroot ./datasets/yh --name yh_cyclegan_imgandseg --batchSize 4 --model cycle_seg --pool_size 50 --no_dropout --yh_run_model Train --dataset_mode yh_seg --input_nc 1 --seg_norm CrossEntropy --output_nc 1 --output_nc_seg 7 --checkpoints_dir /home-local/Cycle_Deep/Checkpoints/ --test_seg_output_dir /home-local/Cycle_Deep/Output/ --display_id 0
42 | ```
43 | - 'name' is
44 | `--model` "cycle_seg" means EssNet
45 | `--yh_run_model` " Train" means do training
46 | `--output_nv_seg` defines number of segmentation labels
47 | `--checkpoints_dir` the place to save checkpoint (model)
48 | `--test_seg_output_dir` the place to save the test segmentation
49 |
50 | ## Testing
51 | - Test the synthesis
52 | ```bash
53 | python train_yh.py --dataroot ./datasets/yh --name yh_cyclegan_imgandseg --batchSize 4 --model cycle_gan --pool_size 50 --no_dropout --yh_run_model Test --dataset_mode yh --input_nc 1 --output_nc 1 --checkpoints_dir /home-local/Cycle_Deep/Checkpoints/ --test_seg_output_dir /home-local/Cycle_Deep/Output/ --which_epoch 50
54 | ```
55 |
56 | - Test the segmentation
57 | ```bash
58 | python train_yh.py --dataroot ./datasets/yh --name yh_cyclegan_imgandseg --batchSize 4 --model test_seg --pool_size 50 --no_dropout --yh_run_model TestSeg --dataset_mode yh_test_seg --input_nc 1 --output_nc 1 --checkpoints_dir/home-local/Cycle_Deep/Checkpoints/ --test_seg_output_dir /home-local/Cycle_Deep/Output/ --which_epoch 50
59 | ```
60 | - 'name' is
61 | `--which_epoch` which training epoch to load
62 |
63 |
64 | ## Citation
65 | If you use this code for your research, please cite our papers.
66 | ```
67 | @article{huo2017adversarial,
68 | title={Adversarial Synthesis Learning Enables Segmentation Without Target Modality Ground Truth},
69 | author={Huo, Yuankai and Xu, Zhoubing and Bao, Shunxing and Assad, Albert and Abramson, Richard G and Landman, Bennett A},
70 | journal={arXiv preprint arXiv:1712.07695},
71 | year={2017}
72 | }
73 | ```
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/torchsrc/datasets/apc/mit_benchmark.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import os
3 | import os.path as osp
4 |
5 | import numpy as np
6 | import scipy.misc
7 | from sklearn.model_selection import train_test_split
8 |
9 | from base import APC2016Base
10 |
11 |
12 | def ids_from_scene_dir(scene_dir, empty_scene_dir):
13 | for i_frame in itertools.count():
14 | empty_file = osp.join(
15 | empty_scene_dir, 'frame-{:06}.color.png'.format(i_frame))
16 | rgb_file = osp.join(
17 | scene_dir, 'frame-{:06}.color.png'.format(i_frame))
18 | segm_file = osp.join(
19 | scene_dir, 'segm/frame-{:06}.segm.png'.format(i_frame))
20 | if not (osp.exists(rgb_file) and osp.exists(segm_file)):
21 | break
22 | data_id = (empty_file, rgb_file, segm_file)
23 | yield data_id
24 |
25 |
26 | def bin_id_from_scene_dir(scene_dir):
27 | caminfo = open(osp.join(scene_dir, 'cam.info.txt')).read()
28 | loc = caminfo.splitlines()[0].split(': ')[-1]
29 | if loc == 'shelf':
30 | bin_id = caminfo.splitlines()[1][-1]
31 | else:
32 | bin_id = 'tote'
33 | return bin_id
34 |
35 |
36 | class APC2016mit_benchmark(APC2016Base):
37 |
38 | def __init__(self, split='train', transform=False):
39 | assert split in ['train', 'valid', 'all']
40 | self.split = split
41 | self._transform = transform
42 | self.dataset_dir = osp.expanduser('~/data/datasets/APC2016/benchmark')
43 | data_ids = self._get_ids()
44 | ids_train, ids_valid = train_test_split(
45 | data_ids, test_size=0.25, random_state=1234)
46 | self._ids = {'train': ids_train, 'valid': ids_valid, 'all': data_ids}
47 |
48 | def __len__(self):
49 | return len(self._ids[self.split])
50 |
51 | def _get_ids_from_loc_dir(self, env, loc_dir):
52 | assert env in ('office', 'warehouse')
53 | loc = osp.basename(loc_dir)
54 | data_ids = []
55 | for scene_dir in os.listdir(loc_dir):
56 | scene_dir = osp.join(loc_dir, scene_dir)
57 | bin_id = bin_id_from_scene_dir(scene_dir)
58 | empty_dir = osp.join(
59 | self.dataset_dir, env, 'empty', loc, 'scene-{}'.format(bin_id))
60 | data_ids += list(ids_from_scene_dir(scene_dir, empty_dir))
61 | return data_ids
62 |
63 | def _get_ids(self):
64 | data_ids = []
65 | # office
66 | contain_dir = osp.join(self.dataset_dir, 'office/test')
67 | for loc in ['shelf', 'tote']:
68 | loc_dir = osp.join(contain_dir, loc)
69 | data_ids += self._get_ids_from_loc_dir('office', loc_dir)
70 | # warehouse
71 | contain_dir = osp.join(self.dataset_dir, 'warehouse')
72 | for sub in ['practice', 'competition']:
73 | sub_contain_dir = osp.join(contain_dir, sub)
74 | for loc in ['shelf', 'tote']:
75 | loc_dir = osp.join(sub_contain_dir, loc)
76 | data_ids += self._get_ids_from_loc_dir('warehouse', loc_dir)
77 | return data_ids
78 |
79 | def _load_from_id(self, data_id):
80 | empty_file, rgb_file, segm_file = data_id
81 | img = scipy.misc.imread(rgb_file, mode='RGB')
82 | img_empty = scipy.misc.imread(empty_file, mode='RGB')
83 | # Label value is multiplied by 9:
84 | # ex) 0: 0/6=0 (background), 54: 54/6=9 (dasani_bottle_water)
85 | lbl = scipy.misc.imread(segm_file, mode='L') / 6
86 | lbl = lbl.astype(np.int32)
87 | img_empty[lbl > 0] = img[lbl > 0]
88 | return img_empty, lbl
89 |
90 | def __getitem__(self, index):
91 | data_id = self._ids[self.split][index]
92 | img, lbl = self._load_from_id(data_id)
93 | if self._transform:
94 | return self.transform(img, lbl)
95 | else:
96 | return img, lbl
97 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split):
15 | n = caffe.NetSpec()
16 | n.data, n.label = L.Python(module='pascalcontext_layers',
17 | layer='PASCALContextSegDataLayer', ntop=2,
18 | param_str=str(dict(voc_dir='../../data/pascal',
19 | context_dir='../../data/pascal-context', split=split,
20 | seed=1337)))
21 |
22 | # the base net
23 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
24 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
25 | n.pool1 = max_pool(n.relu1_2)
26 |
27 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
28 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
29 | n.pool2 = max_pool(n.relu2_2)
30 |
31 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
32 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
33 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
34 | n.pool3 = max_pool(n.relu3_3)
35 |
36 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
37 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
38 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
39 | n.pool4 = max_pool(n.relu4_3)
40 |
41 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
42 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
43 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
44 | n.pool5 = max_pool(n.relu5_3)
45 |
46 | # fully conv
47 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
48 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
49 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
50 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
51 |
52 | n.score_fr = L.Convolution(n.drop7, num_output=60, kernel_size=1, pad=0,
53 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
54 | n.upscore2 = L.Deconvolution(n.score_fr,
55 | convolution_param=dict(num_output=60, kernel_size=4, stride=2,
56 | bias_term=False),
57 | param=[dict(lr_mult=0)])
58 |
59 | n.score_pool4 = L.Convolution(n.pool4, num_output=60, kernel_size=1, pad=0,
60 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
61 | n.score_pool4c = crop(n.score_pool4, n.upscore2)
62 | n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
63 | operation=P.Eltwise.SUM)
64 | n.upscore_pool4 = L.Deconvolution(n.fuse_pool4,
65 | convolution_param=dict(num_output=60, kernel_size=4, stride=2,
66 | bias_term=False),
67 | param=[dict(lr_mult=0)])
68 |
69 | n.score_pool3 = L.Convolution(n.pool3, num_output=60, kernel_size=1, pad=0,
70 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
71 | n.score_pool3c = crop(n.score_pool3, n.upscore_pool4)
72 | n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c,
73 | operation=P.Eltwise.SUM)
74 | n.upscore8 = L.Deconvolution(n.fuse_pool3,
75 | convolution_param=dict(num_output=60, kernel_size=16, stride=8,
76 | bias_term=False),
77 | param=[dict(lr_mult=0)])
78 |
79 | n.score = crop(n.upscore8, n.data)
80 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
81 | loss_param=dict(normalize=False, ignore_label=255))
82 |
83 | return n.to_proto()
84 |
85 | def make_net():
86 | with open('train.prototxt', 'w') as f:
87 | f.write(str(fcn('train')))
88 |
89 | with open('val.prototxt', 'w') as f:
90 | f.write(str(fcn('val')))
91 |
92 | if __name__ == '__main__':
93 | make_net()
94 |
--------------------------------------------------------------------------------
/util/get_data.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import tarfile
4 | import requests
5 | from warnings import warn
6 | from zipfile import ZipFile
7 | from bs4 import BeautifulSoup
8 | from os.path import abspath, isdir, join, basename
9 |
10 |
11 | class GetData(object):
12 | """
13 |
14 | Download CycleGAN or Pix2Pix Data.
15 |
16 | Args:
17 | technique : str
18 | One of: 'cyclegan' or 'pix2pix'.
19 | verbose : bool
20 | If True, print additional information.
21 |
22 | Examples:
23 | >>> from util.get_data import GetData
24 | >>> gd = GetData(technique='cyclegan')
25 | >>> new_data_path = gd.get(save_path='./datasets') # options will be displayed.
26 |
27 | """
28 |
29 | def __init__(self, technique='cyclegan', verbose=True):
30 | url_dict = {
31 | 'pix2pix': 'https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets',
32 | 'cyclegan': 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets'
33 | }
34 | self.url = url_dict.get(technique.lower())
35 | self._verbose = verbose
36 |
37 | def _print(self, text):
38 | if self._verbose:
39 | print(text)
40 |
41 | @staticmethod
42 | def _get_options(r):
43 | soup = BeautifulSoup(r.text, 'lxml')
44 | options = [h.text for h in soup.find_all('a', href=True)
45 | if h.text.endswith(('.zip', 'tar.gz'))]
46 | return options
47 |
48 | def _present_options(self):
49 | r = requests.get(self.url)
50 | options = self._get_options(r)
51 | print('Options:\n')
52 | for i, o in enumerate(options):
53 | print("{0}: {1}".format(i, o))
54 | choice = input("\nPlease enter the number of the "
55 | "dataset above you wish to download:")
56 | return options[int(choice)]
57 |
58 | def _download_data(self, dataset_url, save_path):
59 | if not isdir(save_path):
60 | os.makedirs(save_path)
61 |
62 | base = basename(dataset_url)
63 | temp_save_path = join(save_path, base)
64 |
65 | with open(temp_save_path, "wb") as f:
66 | r = requests.get(dataset_url)
67 | f.write(r.content)
68 |
69 | if base.endswith('.tar.gz'):
70 | obj = tarfile.open(temp_save_path)
71 | elif base.endswith('.zip'):
72 | obj = ZipFile(temp_save_path, 'r')
73 | else:
74 | raise ValueError("Unknown File Type: {0}.".format(base))
75 |
76 | self._print("Unpacking Data...")
77 | obj.extractall(save_path)
78 | obj.close()
79 | os.remove(temp_save_path)
80 |
81 | def get(self, save_path, dataset=None):
82 | """
83 |
84 | Download a dataset.
85 |
86 | Args:
87 | save_path : str
88 | A directory to save the data to.
89 | dataset : str, optional
90 | A specific dataset to download.
91 | Note: this must include the file extension.
92 | If None, options will be presented for you
93 | to choose from.
94 |
95 | Returns:
96 | save_path_full : str
97 | The absolute path to the downloaded data.
98 |
99 | """
100 | if dataset is None:
101 | selected_dataset = self._present_options()
102 | else:
103 | selected_dataset = dataset
104 |
105 | save_path_full = join(save_path, selected_dataset.split('.')[0])
106 |
107 | if isdir(save_path_full):
108 | warn("\n'{0}' already exists. Voiding Download.".format(
109 | save_path_full))
110 | else:
111 | self._print('Downloading Data...')
112 | url = "{0}/{1}".format(self.url, selected_dataset)
113 | self._download_data(url, save_path=save_path)
114 |
115 | return abspath(save_path_full)
116 |
--------------------------------------------------------------------------------
/data/yh_test_seg_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import torchvision.transforms as transforms
3 | from data.base_dataset import BaseDataset, get_transform
4 | from PIL import Image
5 | import torch
6 | import random
7 | import random_crop_yh
8 |
9 | class yhTestSegDataset(BaseDataset):
10 | def initialize(self, opt):
11 | self.opt = opt
12 | self.root = opt.dataroot
13 |
14 | self.dir_A = opt.test_CT_dir
15 | # self.dir_Seg = opt.test_CT_seg_dir
16 |
17 | self.A_paths = opt.imglist_testCT
18 |
19 | self.A_size = len(self.A_paths)
20 |
21 | if not self.opt.isTrain:
22 | self.skipcrop = True
23 | else:
24 | self.skipcrop = False
25 | # self.transform = get_transform(opt)
26 |
27 | if self.skipcrop:
28 | osize = [opt.fineSize, opt.fineSize]
29 | else:
30 | osize = [opt.loadSize, opt.loadSize]
31 | transform_list = []
32 | transform_list.append(transforms.Scale(osize, Image.BICUBIC))
33 | self.transforms_scale = transforms.Compose(transform_list)
34 |
35 | transform_list = []
36 | transform_list.append(transforms.Scale(osize, Image.NEAREST))
37 | self.transforms_seg_scale = transforms.Compose(transform_list)
38 |
39 | transform_list = []
40 | transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize))
41 | self.transforms_crop = transforms.Compose(transform_list)
42 |
43 | transform_list = []
44 | transform_list.append(transforms.ToTensor())
45 | self.transforms_toTensor = transforms.Compose(transform_list)
46 |
47 | transform_list = []
48 | transform_list.append(transforms.Normalize((0.5, 0.5, 0.5),
49 | (0.5, 0.5, 0.5)))
50 | self.transforms_normalize = transforms.Compose(transform_list)
51 |
52 |
53 | def __getitem__(self, index):
54 | A_path = self.A_paths[index % self.A_size]
55 | # Seg_path = A_path.replace(self.dir_A,self.dir_Seg)
56 | # Seg_path = Seg_path.replace('_rawimg','_organlabel')
57 |
58 | A_img = Image.open(A_path).convert('L')
59 | # Seg_img = Image.open(Seg_path).convert('I')
60 |
61 | A_img = self.transforms_scale(A_img)
62 | # Seg_img = self.transforms_seg_scale(Seg_img)
63 |
64 | A_img = self.transforms_toTensor(A_img)
65 | # Seg_img = self.transforms_toTensor(Seg_img)
66 |
67 | A_img = self.transforms_normalize(A_img)
68 |
69 | #strategy 1
70 | # Seg_img[Seg_img == 6] = 4
71 | # Seg_img[Seg_img == 7] = 5
72 | # Seg_img[Seg_img == 14] = 6
73 | #
74 | # Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize)
75 | # Seg_imgs[0, :, :] = Seg_img == 0
76 | # Seg_imgs[1, :, :] = Seg_img == 1
77 | # Seg_imgs[2, :, :] = Seg_img == 2
78 | # Seg_imgs[3, :, :] = Seg_img == 3
79 | # Seg_imgs[4, :, :] = Seg_img == 4
80 | # Seg_imgs[5, :, :] = Seg_img == 5
81 | # Seg_imgs[6, :, :] = Seg_img == 6
82 |
83 | #strategy 2
84 | # Seg_img[Seg_img == 2] = 3
85 | # Seg_img[Seg_img == 14] = 3
86 | # Seg_img[Seg_img == 3] = 3
87 | # Seg_img[Seg_img == 4] = 3
88 | # Seg_img[Seg_img == 5] = 3
89 | # Seg_img[Seg_img == 7] = 3
90 | # Seg_img[Seg_img == 6] = 2
91 | #
92 | # Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize)
93 | # Seg_imgs[0, :, :] = Seg_img == 0
94 | # Seg_imgs[1, :, :] = Seg_img == 1
95 | # Seg_imgs[2, :, :] = Seg_img == 2
96 | # Seg_imgs[3, :, :] = Seg_img == 3
97 | Seg_imgs = 0
98 | Seg_path = ''
99 |
100 | return {'A': A_img, 'Seg': Seg_imgs,
101 | 'A_paths': A_path, 'Seg_paths':Seg_path}
102 |
103 | def __len__(self):
104 | return self.A_size
105 |
106 | def name(self):
107 | return 'TestCTDataset'
108 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/train.txt:
--------------------------------------------------------------------------------
1 | 5449
2 | 6140
3 | 5902
4 | 5543
5 | 6392
6 | 5425
7 | 5121
8 | 5506
9 | 5696
10 | 6239
11 | 6143
12 | 5485
13 | 5990
14 | 5322
15 | 6138
16 | 5986
17 | 5756
18 | 5323
19 | 5158
20 | 5921
21 | 5855
22 | 5478
23 | 5898
24 | 5415
25 | 6054
26 | 5161
27 | 5318
28 | 5218
29 | 5460
30 | 6056
31 | 6313
32 | 5595
33 | 5256
34 | 5353
35 | 5044
36 | 5177
37 | 6029
38 | 5980
39 | 5493
40 | 5528
41 | 5904
42 | 5895
43 | 5881
44 | 5275
45 | 5829
46 | 5426
47 | 6334
48 | 5548
49 | 5988
50 | 5714
51 | 5254
52 | 5309
53 | 5253
54 | 5255
55 | 5983
56 | 5752
57 | 5005
58 | 6240
59 | 5546
60 | 5695
61 | 5684
62 | 5751
63 | 6274
64 | 5882
65 | 5730
66 | 5495
67 | 5489
68 | 5749
69 | 6244
70 | 5599
71 | 5503
72 | 5319
73 | 5418
74 | 5454
75 | 5937
76 | 5416
77 | 5989
78 | 5505
79 | 6352
80 | 6237
81 | 6139
82 | 5901
83 | 5421
84 | 5498
85 | 5602
86 | 5083
87 | 5944
88 | 5456
89 | 6122
90 | 6333
91 | 5417
92 | 5981
93 | 5165
94 | 6417
95 | 5758
96 | 5527
97 | 5082
98 | 5805
99 | 5308
100 | 5828
101 | 5120
102 | 5214
103 | 5530
104 | 6026
105 | 5452
106 | 5008
107 | 5251
108 | 6047
109 | 6238
110 | 6008
111 | 5925
112 | 5873
113 | 6366
114 | 5156
115 | 5875
116 | 6311
117 | 6224
118 | 6169
119 | 5922
120 | 5877
121 | 5615
122 | 5896
123 | 5715
124 | 5890
125 | 6141
126 | 5179
127 | 5215
128 | 5685
129 | 6246
130 | 5641
131 | 5058
132 | 5807
133 | 5122
134 | 5423
135 | 5716
136 | 5652
137 | 5262
138 | 5978
139 | 5429
140 | 5542
141 | 5598
142 | 5984
143 | 5354
144 | 5261
145 | 6044
146 | 5003
147 | 5888
148 | 5422
149 | 5124
150 | 5219
151 | 6009
152 | 6087
153 | 5892
154 | 6168
155 | 5616
156 | 5754
157 | 5547
158 | 5393
159 | 5889
160 | 5750
161 | 5963
162 | 5500
163 | 5004
164 | 5303
165 | 6269
166 | 6243
167 | 5885
168 | 5019
169 | 5757
170 | 6267
171 | 5809
172 | 5321
173 | 5529
174 | 5643
175 | 5748
176 | 5501
177 | 6137
178 | 5213
179 | 5259
180 | 5596
181 | 5745
182 | 5653
183 | 6418
184 | 5507
185 | 5136
186 | 5453
187 | 6367
188 | 5544
189 | 6046
190 | 6271
191 | 5252
192 | 5488
193 | 5480
194 | 5080
195 | 5504
196 | 5274
197 | 5578
198 | 5920
199 | 5654
200 | 5924
201 | 5260
202 | 5394
203 | 6041
204 | 5263
205 | 6223
206 | 5642
207 | 6121
208 | 5497
209 | 5939
210 | 5491
211 | 5825
212 | 5753
213 | 5320
214 | 5487
215 | 6042
216 | 6270
217 | 5940
218 | 5157
219 | 5479
220 | 5496
221 | 5639
222 | 5392
223 | 6177
224 | 5614
225 | 5451
226 | 6312
227 | 6199
228 | 5667
229 | 5666
230 | 6198
231 | 5006
232 | 5427
233 | 5887
234 | 5755
235 | 6200
236 | 5461
237 | 6120
238 | 5982
239 | 6416
240 | 5277
241 | 5884
242 | 6142
243 | 6268
244 | 5880
245 | 6266
246 | 5166
247 | 5258
248 | 5420
249 | 5490
250 | 5135
251 | 5655
252 | 5391
253 | 5682
254 | 5853
255 | 5905
256 | 6045
257 | 5576
258 | 5827
259 | 5492
260 | 5943
261 | 5574
262 | 5307
263 | 5428
264 | 5874
265 | 6006
266 | 5458
267 | 5883
268 | 6030
269 | 5808
270 | 5964
271 | 5305
272 | 5159
273 | 5540
274 | 6178
275 | 6024
276 | 5484
277 | 5832
278 | 6031
279 | 5459
280 | 6028
281 | 5729
282 | 5601
283 | 6415
284 | 5483
285 | 5324
286 | 5894
287 | 5830
288 | 6025
289 | 5854
290 | 5164
291 | 6350
292 | 5903
293 | 6296
294 | 5600
295 | 5486
296 | 5007
297 | 6055
298 | 5747
299 | 5872
300 | 5856
301 | 5482
302 | 5424
303 | 5987
304 | 6222
305 | 5597
306 | 5876
307 | 5824
308 | 5178
309 | 6085
310 | 5979
311 | 6197
312 | 5985
313 | 5572
314 | 5899
315 | 5020
316 | 6241
317 | 5276
318 | 5938
319 | 5806
320 | 6272
321 | 6043
322 | 5502
323 | 5893
324 | 6105
325 | 5160
326 | 5886
327 | 6007
328 | 5923
329 | 5942
330 | 5665
331 | 6225
332 | 5577
333 | 5257
334 | 6273
335 | 5481
336 | 5162
337 | 5217
338 | 5457
339 | 6245
340 | 5879
341 | 6005
342 | 6309
343 | 5575
344 | 5494
345 | 5900
346 | 5216
347 | 5304
348 | 5499
349 | 5746
350 | 5545
351 | 5045
352 | 6236
353 | 5278
354 | 6242
355 | 5123
356 | 5450
357 | 5306
358 | 5419
359 | 5897
360 | 5831
361 | 6086
362 | 5891
363 | 5455
364 | 6351
365 | 5878
366 | 5826
367 | 5081
368 | 6420
369 | 6393
370 | 6040
371 | 5573
372 | 6310
373 | 5640
374 | 5936
375 | 5541
376 | 6221
377 | 5163
378 | 6027
379 | 5941
380 | 5683
381 | 6419
--------------------------------------------------------------------------------
/data/yh_seg_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import torchvision.transforms as transforms
3 | from data.base_dataset import BaseDataset, get_transform
4 | from PIL import Image
5 | import torch
6 | import random
7 | import random_crop_yh
8 |
9 | class yhSegDataset(BaseDataset):
10 | def initialize(self, opt):
11 | self.opt = opt
12 | self.root = opt.dataroot
13 | self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A')
14 | self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B')
15 |
16 | self.dir_A = opt.raw_MRI_dir
17 | self.dir_B = opt.raw_CT_dir
18 | self.dir_Seg = opt.raw_MRI_seg_dir
19 |
20 | self.A_paths = opt.imglist_MRI
21 | self.B_paths = opt.imglist_CT
22 |
23 | self.A_size = len(self.A_paths)
24 | self.B_size = len(self.B_paths)
25 | if not self.opt.isTrain:
26 | self.skipcrop = True
27 | else:
28 | self.skipcrop = False
29 | # self.transform = get_transform(opt)
30 |
31 | if self.skipcrop:
32 | osize = [opt.fineSize, opt.fineSize]
33 | else:
34 | osize = [opt.loadSize, opt.loadSize]
35 | transform_list = []
36 | transform_list.append(transforms.Scale(osize, Image.BICUBIC))
37 | self.transforms_scale = transforms.Compose(transform_list)
38 |
39 | transform_list = []
40 | transform_list.append(transforms.Scale(osize, Image.NEAREST))
41 | self.transforms_seg_scale = transforms.Compose(transform_list)
42 |
43 | transform_list = []
44 | transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize))
45 | self.transforms_crop = transforms.Compose(transform_list)
46 |
47 | transform_list = []
48 | transform_list.append(transforms.ToTensor())
49 | self.transforms_toTensor = transforms.Compose(transform_list)
50 |
51 | transform_list = []
52 | transform_list.append(transforms.Normalize((0.5, 0.5, 0.5),
53 | (0.5, 0.5, 0.5)))
54 | self.transforms_normalize = transforms.Compose(transform_list)
55 |
56 |
57 | def __getitem__(self, index):
58 | index_A = index % self.A_size
59 | A_path = self.A_paths[index_A]
60 | Seg_path = A_path.replace(self.dir_A,self.dir_Seg)
61 | Seg_path = Seg_path.replace('_rawimg','_organlabel')
62 |
63 | index_B = random.randint(0, self.B_size - 1)
64 | B_path = self.B_paths[index_B]
65 | A_img = Image.open(A_path).convert('L')
66 | Seg_img = Image.open(Seg_path).convert('I')
67 | B_img = Image.open(B_path).convert('L')
68 |
69 | A_img = self.transforms_scale(A_img)
70 | B_img = self.transforms_scale(B_img)
71 | Seg_img = self.transforms_seg_scale(Seg_img)
72 |
73 | if not self.skipcrop:
74 | [A_img,Seg_img] = self.transforms_crop([A_img, Seg_img])
75 | [B_img] = self.transforms_crop([B_img])
76 |
77 | A_img = self.transforms_toTensor(A_img)
78 | B_img = self.transforms_toTensor(B_img)
79 | Seg_img = self.transforms_toTensor(Seg_img)
80 |
81 | A_img = self.transforms_normalize(A_img)
82 | B_img = self.transforms_normalize(B_img)
83 |
84 | Seg_img[Seg_img == 6] = 4
85 | Seg_img[Seg_img == 7] = 5
86 | Seg_img[Seg_img == 14] = 6
87 |
88 | Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize)
89 | Seg_imgs[0, :, :] = Seg_img == 0
90 | Seg_imgs[1, :, :] = Seg_img == 1
91 | Seg_imgs[2, :, :] = Seg_img == 2
92 | Seg_imgs[3, :, :] = Seg_img == 3
93 | Seg_imgs[4, :, :] = Seg_img == 4
94 | Seg_imgs[5, :, :] = Seg_img == 5
95 | Seg_imgs[6, :, :] = Seg_img == 6
96 |
97 | return {'A': A_img, 'B': B_img, 'Seg': Seg_imgs, 'Seg_one': Seg_img,
98 | 'A_paths': A_path, 'B_paths': B_path, 'Seg_paths':Seg_path}
99 |
100 |
101 | def __len__(self):
102 | return max(self.A_size, self.B_size)
103 |
104 | def name(self):
105 | return 'UnalignedDataset'
106 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split):
15 | n = caffe.NetSpec()
16 | pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
17 | seed=1337)
18 | if split == 'train':
19 | pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
20 | pylayer = 'SBDDSegDataLayer'
21 | else:
22 | pydata_params['voc_dir'] = '../data/pascal/VOC2011'
23 | pylayer = 'VOCSegDataLayer'
24 | n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
25 | ntop=2, param_str=str(pydata_params))
26 |
27 | # the base net
28 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
29 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
30 | n.pool1 = max_pool(n.relu1_2)
31 |
32 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
33 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
34 | n.pool2 = max_pool(n.relu2_2)
35 |
36 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
37 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
38 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
39 | n.pool3 = max_pool(n.relu3_3)
40 |
41 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
42 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
43 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
44 | n.pool4 = max_pool(n.relu4_3)
45 |
46 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
47 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
48 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
49 | n.pool5 = max_pool(n.relu5_3)
50 |
51 | # fully conv
52 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
53 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
54 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
55 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
56 | n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
57 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
58 | n.upscore2 = L.Deconvolution(n.score_fr,
59 | convolution_param=dict(num_output=21, kernel_size=4, stride=2,
60 | bias_term=False),
61 | param=[dict(lr_mult=0)])
62 |
63 | n.score_pool4 = L.Convolution(n.pool4, num_output=21, kernel_size=1, pad=0,
64 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
65 | n.score_pool4c = crop(n.score_pool4, n.upscore2)
66 | n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
67 | operation=P.Eltwise.SUM)
68 | n.upscore_pool4 = L.Deconvolution(n.fuse_pool4,
69 | convolution_param=dict(num_output=21, kernel_size=4, stride=2,
70 | bias_term=False),
71 | param=[dict(lr_mult=0)])
72 |
73 | n.score_pool3 = L.Convolution(n.pool3, num_output=21, kernel_size=1, pad=0,
74 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
75 | n.score_pool3c = crop(n.score_pool3, n.upscore_pool4)
76 | n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c,
77 | operation=P.Eltwise.SUM)
78 | n.upscore8 = L.Deconvolution(n.fuse_pool3,
79 | convolution_param=dict(num_output=21, kernel_size=16, stride=8,
80 | bias_term=False),
81 | param=[dict(lr_mult=0)])
82 |
83 | n.score = crop(n.upscore8, n.data)
84 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
85 | loss_param=dict(normalize=False, ignore_label=255))
86 |
87 | return n.to_proto()
88 |
89 | def make_net():
90 | with open('train.prototxt', 'w') as f:
91 | f.write(str(fcn('train')))
92 |
93 | with open('val.prototxt', 'w') as f:
94 | f.write(str(fcn('seg11valid')))
95 |
96 | if __name__ == '__main__':
97 | make_net()
98 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def modality_fcn(net_spec, data, modality):
15 | n = net_spec
16 | # the base net
17 | n['conv1_1' + modality], n['relu1_1' + modality] = conv_relu(n[data], 64,
18 | pad=100)
19 | n['conv1_2' + modality], n['relu1_2' + modality] = conv_relu(n['relu1_1' +
20 | modality], 64)
21 | n['pool1' + modality] = max_pool(n['relu1_2' + modality])
22 |
23 | n['conv2_1' + modality], n['relu2_1' + modality] = conv_relu(n['pool1' +
24 | modality], 128)
25 | n['conv2_2' + modality], n['relu2_2' + modality] = conv_relu(n['relu2_1' +
26 | modality], 128)
27 | n['pool2' + modality] = max_pool(n['relu2_2' + modality])
28 |
29 | n['conv3_1' + modality], n['relu3_1' + modality] = conv_relu(n['pool2' +
30 | modality], 256)
31 | n['conv3_2' + modality], n['relu3_2' + modality] = conv_relu(n['relu3_1' +
32 | modality], 256)
33 | n['conv3_3' + modality], n['relu3_3' + modality] = conv_relu(n['relu3_2' +
34 | modality], 256)
35 | n['pool3' + modality] = max_pool(n['relu3_3' + modality])
36 |
37 | n['conv4_1' + modality], n['relu4_1' + modality] = conv_relu(n['pool3' +
38 | modality], 512)
39 | n['conv4_2' + modality], n['relu4_2' + modality] = conv_relu(n['relu4_1' +
40 | modality], 512)
41 | n['conv4_3' + modality], n['relu4_3' + modality] = conv_relu(n['relu4_2' +
42 | modality], 512)
43 | n['pool4' + modality] = max_pool(n['relu4_3' + modality])
44 |
45 | n['conv5_1' + modality], n['relu5_1' + modality] = conv_relu(n['pool4' +
46 | modality], 512)
47 | n['conv5_2' + modality], n['relu5_2' + modality] = conv_relu(n['relu5_1' +
48 | modality], 512)
49 | n['conv5_3' + modality], n['relu5_3' + modality] = conv_relu(n['relu5_2' +
50 | modality], 512)
51 | n['pool5' + modality] = max_pool(n['relu5_3' + modality])
52 |
53 | # fully conv
54 | n['fc6' + modality], n['relu6' + modality] = conv_relu(
55 | n['pool5' + modality], 4096, ks=7, pad=0)
56 | n['drop6' + modality] = L.Dropout(
57 | n['relu6' + modality], dropout_ratio=0.5, in_place=True)
58 | n['fc7' + modality], n['relu7' + modality] = conv_relu(
59 | n['drop6' + modality], 4096, ks=1, pad=0)
60 | n['drop7' + modality] = L.Dropout(
61 | n['relu7' + modality], dropout_ratio=0.5, in_place=True)
62 | n['score_fr' + modality] = L.Convolution(
63 | n['drop7' + modality], num_output=40, kernel_size=1, pad=0,
64 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
65 | return n
66 |
67 | def fcn(split, tops):
68 | n = caffe.NetSpec()
69 | n.color, n.hha, n.label = L.Python(module='nyud_layers',
70 | layer='NYUDSegDataLayer', ntop=3,
71 | param_str=str(dict(nyud_dir='../data/nyud', split=split,
72 | tops=tops, seed=1337)))
73 | n = modality_fcn(n, 'color', 'color')
74 | n = modality_fcn(n, 'hha', 'hha')
75 | n.score_fused = L.Eltwise(n.score_frcolor, n.score_frhha,
76 | operation=P.Eltwise.SUM, coeff=[0.5, 0.5])
77 | n.upscore = L.Deconvolution(n.score_fused,
78 | convolution_param=dict(num_output=40, kernel_size=64, stride=32,
79 | bias_term=False),
80 | param=[dict(lr_mult=0)])
81 | n.score = crop(n.upscore, n.color)
82 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
83 | loss_param=dict(normalize=False, ignore_label=255))
84 | return n.to_proto()
85 |
86 | def make_net():
87 | tops = ['color', 'hha', 'label']
88 | with open('trainval.prototxt', 'w') as f:
89 | f.write(str(fcn('trainval', tops)))
90 |
91 | with open('test.prototxt', 'w') as f:
92 | f.write(str(fcn('test', tops)))
93 |
94 | if __name__ == '__main__':
95 | make_net()
96 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/val.txt:
--------------------------------------------------------------------------------
1 | 5010
2 | 5011
3 | 5012
4 | 5013
5 | 5022
6 | 5023
7 | 5024
8 | 5025
9 | 5026
10 | 5027
11 | 5048
12 | 5049
13 | 5050
14 | 5051
15 | 5052
16 | 5053
17 | 5054
18 | 5055
19 | 5064
20 | 5065
21 | 5066
22 | 5067
23 | 5068
24 | 5069
25 | 5070
26 | 5071
27 | 5072
28 | 5073
29 | 5074
30 | 5075
31 | 5092
32 | 5093
33 | 5094
34 | 5095
35 | 5096
36 | 5097
37 | 5098
38 | 5099
39 | 5100
40 | 5101
41 | 5102
42 | 5103
43 | 5104
44 | 5105
45 | 5106
46 | 5107
47 | 5108
48 | 5109
49 | 5110
50 | 5111
51 | 5112
52 | 5113
53 | 5114
54 | 5115
55 | 5116
56 | 5130
57 | 5138
58 | 5139
59 | 5140
60 | 5141
61 | 5142
62 | 5143
63 | 5144
64 | 5145
65 | 5146
66 | 5147
67 | 5148
68 | 5149
69 | 5150
70 | 5151
71 | 5152
72 | 5170
73 | 5203
74 | 5204
75 | 5205
76 | 5206
77 | 5223
78 | 5224
79 | 5225
80 | 5226
81 | 5227
82 | 5228
83 | 5229
84 | 5230
85 | 5231
86 | 5232
87 | 5233
88 | 5234
89 | 5235
90 | 5236
91 | 5237
92 | 5238
93 | 5239
94 | 5240
95 | 5241
96 | 5242
97 | 5243
98 | 5244
99 | 5245
100 | 5246
101 | 5247
102 | 5248
103 | 5249
104 | 5265
105 | 5266
106 | 5267
107 | 5268
108 | 5269
109 | 5270
110 | 5286
111 | 5287
112 | 5288
113 | 5289
114 | 5290
115 | 5291
116 | 5292
117 | 5293
118 | 5294
119 | 5295
120 | 5313
121 | 5314
122 | 5336
123 | 5337
124 | 5338
125 | 5339
126 | 5340
127 | 5341
128 | 5342
129 | 5343
130 | 5344
131 | 5345
132 | 5346
133 | 5347
134 | 5348
135 | 5349
136 | 5350
137 | 5365
138 | 5366
139 | 5367
140 | 5368
141 | 5369
142 | 5370
143 | 5371
144 | 5372
145 | 5373
146 | 5374
147 | 5375
148 | 5376
149 | 5377
150 | 5378
151 | 5379
152 | 5380
153 | 5381
154 | 5382
155 | 5383
156 | 5398
157 | 5399
158 | 5400
159 | 5401
160 | 5402
161 | 5403
162 | 5404
163 | 5405
164 | 5406
165 | 5407
166 | 5408
167 | 5409
168 | 5410
169 | 5436
170 | 5437
171 | 5438
172 | 5439
173 | 5440
174 | 5467
175 | 5468
176 | 5514
177 | 5534
178 | 5535
179 | 5536
180 | 5552
181 | 5553
182 | 5554
183 | 5584
184 | 5585
185 | 5586
186 | 5587
187 | 5588
188 | 5589
189 | 5590
190 | 5608
191 | 5609
192 | 5610
193 | 5611
194 | 5622
195 | 5623
196 | 5624
197 | 5625
198 | 5626
199 | 5627
200 | 5628
201 | 5629
202 | 5630
203 | 5631
204 | 5632
205 | 5646
206 | 5647
207 | 5648
208 | 5649
209 | 5659
210 | 5660
211 | 5661
212 | 5662
213 | 5674
214 | 5675
215 | 5691
216 | 5692
217 | 5700
218 | 5701
219 | 5702
220 | 5703
221 | 5704
222 | 5705
223 | 5719
224 | 5720
225 | 5721
226 | 5722
227 | 5723
228 | 5735
229 | 5736
230 | 5737
231 | 5738
232 | 5739
233 | 5740
234 | 5741
235 | 5742
236 | 5788
237 | 5789
238 | 5790
239 | 5791
240 | 5792
241 | 5793
242 | 5794
243 | 5795
244 | 5796
245 | 5797
246 | 5798
247 | 5799
248 | 5815
249 | 5816
250 | 5817
251 | 5818
252 | 5819
253 | 5820
254 | 5847
255 | 5848
256 | 5849
257 | 5863
258 | 5864
259 | 5865
260 | 5866
261 | 5867
262 | 5868
263 | 5909
264 | 5910
265 | 5911
266 | 5912
267 | 5913
268 | 5914
269 | 5915
270 | 5916
271 | 5929
272 | 5930
273 | 5931
274 | 5948
275 | 5949
276 | 5950
277 | 5951
278 | 5952
279 | 5953
280 | 5954
281 | 5955
282 | 5956
283 | 5957
284 | 5958
285 | 5968
286 | 5969
287 | 5996
288 | 5997
289 | 5998
290 | 5999
291 | 6000
292 | 6013
293 | 6014
294 | 6015
295 | 6016
296 | 6017
297 | 6018
298 | 6019
299 | 6020
300 | 6035
301 | 6036
302 | 6037
303 | 6050
304 | 6051
305 | 6059
306 | 6060
307 | 6061
308 | 6062
309 | 6063
310 | 6064
311 | 6065
312 | 6066
313 | 6067
314 | 6068
315 | 6069
316 | 6070
317 | 6071
318 | 6072
319 | 6073
320 | 6074
321 | 6097
322 | 6110
323 | 6111
324 | 6112
325 | 6113
326 | 6114
327 | 6115
328 | 6116
329 | 6132
330 | 6133
331 | 6134
332 | 6159
333 | 6160
334 | 6161
335 | 6172
336 | 6173
337 | 6185
338 | 6186
339 | 6187
340 | 6188
341 | 6189
342 | 6190
343 | 6191
344 | 6213
345 | 6214
346 | 6215
347 | 6231
348 | 6232
349 | 6251
350 | 6252
351 | 6253
352 | 6281
353 | 6282
354 | 6283
355 | 6284
356 | 6300
357 | 6301
358 | 6316
359 | 6317
360 | 6318
361 | 6319
362 | 6320
363 | 6321
364 | 6322
365 | 6323
366 | 6324
367 | 6325
368 | 6326
369 | 6327
370 | 6328
371 | 6341
372 | 6342
373 | 6343
374 | 6344
375 | 6345
376 | 6346
377 | 6357
378 | 6358
379 | 6359
380 | 6360
381 | 6361
382 | 6362
383 | 6363
384 | 6370
385 | 6371
386 | 6372
387 | 6373
388 | 6374
389 | 6375
390 | 6376
391 | 6377
392 | 6378
393 | 6379
394 | 6380
395 | 6381
396 | 6382
397 | 6383
398 | 6402
399 | 6403
400 | 6404
401 | 6405
402 | 6406
403 | 6425
404 | 6426
405 | 6427
406 | 6428
407 | 6429
408 | 6434
409 | 6435
410 | 6436
411 | 6437
412 | 6438
413 | 6439
414 | 6440
415 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/test.txt:
--------------------------------------------------------------------------------
1 | coast_natu975
2 | insidecity_art947
3 | insidecity_urb781
4 | highway_bost374
5 | coast_n203085
6 | insidecity_a223049
7 | mountain_nat116
8 | street_art861
9 | mountain_land188
10 | street_par177
11 | opencountry_natu524
12 | forest_natu29
13 | highway_gre37
14 | street_bost77
15 | insidecity_art1125
16 | street_urb521
17 | highway_bost178
18 | street_art760
19 | street_urb885
20 | insidecity_art829
21 | coast_natu804
22 | mountain_sharp44
23 | coast_natu649
24 | opencountry_land691
25 | insidecity_hous35
26 | tallbuilding_art1719
27 | mountain_n736026
28 | mountain_moun41
29 | insidecity_urban992
30 | opencountry_land295
31 | tallbuilding_art527
32 | highway_art238
33 | forest_for114
34 | coast_land296
35 | tallbuilding_sky7
36 | mountain_n44009
37 | tallbuilding_art1316
38 | forest_nat717
39 | highway_bost164
40 | street_par29
41 | forest_natc52
42 | tallbuilding_art1004
43 | coast_sun14
44 | opencountry_land206
45 | opencountry_land364
46 | mountain_n219015
47 | highway_a836030
48 | forest_nat324
49 | opencountry_land493
50 | insidecity_art1598
51 | street_street27
52 | insidecity_a48009
53 | coast_cdmc889
54 | street_gre295
55 | tallbuilding_a538076
56 | street_boston378
57 | highway_urb759
58 | street_par151
59 | tallbuilding_urban1003
60 | tallbuilding_urban16
61 | highway_bost151
62 | opencountry_nat965
63 | highway_gre661
64 | forest_for42
65 | opencountry_n18002
66 | insidecity_art646
67 | highway_gre55
68 | coast_n295051
69 | forest_bost103
70 | highway_n480036
71 | mountain_land4
72 | forest_nat130
73 | coast_nat643
74 | insidecity_urb250
75 | street_gre11
76 | street_boston271
77 | opencountry_n490003
78 | mountain_nat762
79 | street_par86
80 | coast_arnat59
81 | mountain_land787
82 | highway_gre472
83 | opencountry_tell67
84 | mountain_sharp66
85 | opencountry_land534
86 | insidecity_gre290
87 | highway_bost307
88 | opencountry_n213059
89 | forest_nat220
90 | forest_cdmc348
91 | tallbuilding_art900
92 | insidecity_art569
93 | street_urb200
94 | coast_natu468
95 | coast_n672069
96 | insidecity_hous109
97 | forest_land862
98 | opencountry_natu65
99 | tallbuilding_a805096
100 | opencountry_n291058
101 | forest_natu439
102 | coast_nat799
103 | tallbuilding_urban991
104 | tallbuilding_sky17
105 | opencountry_land638
106 | opencountry_natu563
107 | tallbuilding_urb733
108 | forest_cdmc451
109 | mountain_n371066
110 | mountain_n213081
111 | mountain_nat57
112 | tallbuilding_a463068
113 | forest_natu848
114 | tallbuilding_art306
115 | insidecity_boston92
116 | insidecity_urb584
117 | tallbuilding_urban1126
118 | coast_n286045
119 | street_gre179
120 | coast_nat1091
121 | opencountry_nat615
122 | coast_nat901
123 | forest_cdmc291
124 | mountain_natu568
125 | mountain_n18070
126 | street_bost136
127 | tallbuilding_art425
128 | coast_bea3
129 | tallbuilding_art1616
130 | insidecity_art690
131 | highway_gre492
132 | highway_bost320
133 | forest_nat400
134 | highway_par23
135 | tallbuilding_a212033
136 | forest_natu994
137 | tallbuilding_archi296
138 | highway_gre413
139 | tallbuilding_a279033
140 | insidecity_art1277
141 | coast_cdmc948
142 | forest_for15
143 | street_par68
144 | mountain_natu786
145 | opencountry_open61
146 | opencountry_nat423
147 | mountain_land143
148 | tallbuilding_a487066
149 | tallbuilding_art1751
150 | insidecity_hous79
151 | street_par118
152 | highway_bost293
153 | mountain_n213021
154 | opencountry_nat802
155 | coast_n384099
156 | opencountry_natu998
157 | mountain_n344042
158 | coast_nat1265
159 | forest_text44
160 | forest_for84
161 | insidecity_a807066
162 | opencountry_nat1117
163 | coast_sun42
164 | insidecity_par180
165 | opencountry_land923
166 | highway_art580
167 | street_art1328
168 | coast_cdmc838
169 | opencountry_land660
170 | opencountry_cdmc354
171 | coast_natu825
172 | opencountry_natu38
173 | mountain_nat30
174 | coast_n199066
175 | forest_text124
176 | forest_land222
177 | tallbuilding_city56
178 | tallbuilding_city22
179 | opencountry_fie36
180 | mountain_ski24
181 | coast_cdmc997
182 | insidecity_boston232
183 | opencountry_land575
184 | opencountry_land797
185 | insidecity_urb362
186 | forest_nat1033
187 | mountain_nat891
188 | street_hexp3
189 | tallbuilding_art1474
190 | tallbuilding_urban73
191 | opencountry_natu852
192 | mountain_nat1008
193 | coast_nat294
194 | mountain_sharp20
195 | opencountry_fie14
196 | mountain_land275
197 | forest_land760
198 | coast_land374
199 | mountain_nat426
200 | highway_gre141
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split):
15 | n = caffe.NetSpec()
16 | pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
17 | seed=1337)
18 | if split == 'train':
19 | pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
20 | pylayer = 'SBDDSegDataLayer'
21 | else:
22 | pydata_params['voc_dir'] = '../data/pascal/VOC2011'
23 | pylayer = 'VOCSegDataLayer'
24 | n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
25 | ntop=2, param_str=str(pydata_params))
26 |
27 | # the base net
28 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
29 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
30 | n.pool1 = max_pool(n.relu1_2)
31 |
32 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
33 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
34 | n.pool2 = max_pool(n.relu2_2)
35 |
36 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
37 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
38 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
39 | n.pool3 = max_pool(n.relu3_3)
40 |
41 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
42 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
43 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
44 | n.pool4 = max_pool(n.relu4_3)
45 |
46 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
47 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
48 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
49 | n.pool5 = max_pool(n.relu5_3)
50 |
51 | # fully conv
52 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
53 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
54 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
55 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
56 |
57 | n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
58 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
59 | n.upscore2 = L.Deconvolution(n.score_fr,
60 | convolution_param=dict(num_output=21, kernel_size=4, stride=2,
61 | bias_term=False),
62 | param=[dict(lr_mult=0)])
63 |
64 | # scale pool4 skip for compatibility
65 | n.scale_pool4 = L.Scale(n.pool4, filler=dict(type='constant',
66 | value=0.01), param=[dict(lr_mult=0)])
67 | n.score_pool4 = L.Convolution(n.scale_pool4, num_output=21, kernel_size=1, pad=0,
68 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
69 | n.score_pool4c = crop(n.score_pool4, n.upscore2)
70 | n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
71 | operation=P.Eltwise.SUM)
72 | n.upscore_pool4 = L.Deconvolution(n.fuse_pool4,
73 | convolution_param=dict(num_output=21, kernel_size=4, stride=2,
74 | bias_term=False),
75 | param=[dict(lr_mult=0)])
76 |
77 | # scale pool3 skip for compatibility
78 | n.scale_pool3 = L.Scale(n.pool3, filler=dict(type='constant',
79 | value=0.0001), param=[dict(lr_mult=0)])
80 | n.score_pool3 = L.Convolution(n.scale_pool3, num_output=21, kernel_size=1, pad=0,
81 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
82 | n.score_pool3c = crop(n.score_pool3, n.upscore_pool4)
83 | n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c,
84 | operation=P.Eltwise.SUM)
85 | n.upscore8 = L.Deconvolution(n.fuse_pool3,
86 | convolution_param=dict(num_output=21, kernel_size=16, stride=8,
87 | bias_term=False),
88 | param=[dict(lr_mult=0)])
89 |
90 | n.score = crop(n.upscore8, n.data)
91 | n.loss = L.SoftmaxWithLoss(n.score, n.label,
92 | loss_param=dict(normalize=False, ignore_label=255))
93 |
94 | return n.to_proto()
95 |
96 | def make_net():
97 | with open('train.prototxt', 'w') as f:
98 | f.write(str(fcn('train')))
99 |
100 | with open('val.prototxt', 'w') as f:
101 | f.write(str(fcn('seg11valid')))
102 |
103 | if __name__ == '__main__':
104 | make_net()
105 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/net.py:
--------------------------------------------------------------------------------
1 | import caffe
2 | from caffe import layers as L, params as P
3 | from caffe.coord_map import crop
4 |
5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
7 | num_output=nout, pad=pad,
8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
9 | return conv, L.ReLU(conv, in_place=True)
10 |
11 | def max_pool(bottom, ks=2, stride=2):
12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
13 |
14 | def fcn(split):
15 | n = caffe.NetSpec()
16 | n.data, n.sem, n.geo = L.Python(module='siftflow_layers',
17 | layer='SIFTFlowSegDataLayer', ntop=3,
18 | param_str=str(dict(siftflow_dir='../data/sift-flow',
19 | split=split, seed=1337)))
20 |
21 | # the base net
22 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
23 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
24 | n.pool1 = max_pool(n.relu1_2)
25 |
26 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
27 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
28 | n.pool2 = max_pool(n.relu2_2)
29 |
30 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
31 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
32 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
33 | n.pool3 = max_pool(n.relu3_3)
34 |
35 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
36 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
37 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
38 | n.pool4 = max_pool(n.relu4_3)
39 |
40 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
41 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
42 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
43 | n.pool5 = max_pool(n.relu5_3)
44 |
45 | # fully conv
46 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
47 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
48 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
49 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
50 |
51 | n.score_fr_sem = L.Convolution(n.drop7, num_output=33, kernel_size=1, pad=0,
52 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
53 | n.upscore2_sem = L.Deconvolution(n.score_fr_sem,
54 | convolution_param=dict(num_output=33, kernel_size=4, stride=2,
55 | bias_term=False),
56 | param=[dict(lr_mult=0)])
57 |
58 | n.score_pool4_sem = L.Convolution(n.pool4, num_output=33, kernel_size=1, pad=0,
59 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
60 | n.score_pool4_semc = crop(n.score_pool4_sem, n.upscore2_sem)
61 | n.fuse_pool4_sem = L.Eltwise(n.upscore2_sem, n.score_pool4_semc,
62 | operation=P.Eltwise.SUM)
63 | n.upscore16_sem = L.Deconvolution(n.fuse_pool4_sem,
64 | convolution_param=dict(num_output=33, kernel_size=32, stride=16,
65 | bias_term=False),
66 | param=[dict(lr_mult=0)])
67 |
68 | n.score_sem = crop(n.upscore16_sem, n.data)
69 | # loss to make score happy (o.w. loss_sem)
70 | n.loss = L.SoftmaxWithLoss(n.score_sem, n.sem,
71 | loss_param=dict(normalize=False, ignore_label=255))
72 |
73 | n.score_fr_geo = L.Convolution(n.drop7, num_output=3, kernel_size=1, pad=0,
74 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
75 |
76 | n.upscore2_geo = L.Deconvolution(n.score_fr_geo,
77 | convolution_param=dict(num_output=3, kernel_size=4, stride=2,
78 | bias_term=False),
79 | param=[dict(lr_mult=0)])
80 |
81 | n.score_pool4_geo = L.Convolution(n.pool4, num_output=3, kernel_size=1, pad=0,
82 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
83 | n.score_pool4_geoc = crop(n.score_pool4_geo, n.upscore2_geo)
84 | n.fuse_pool4_geo = L.Eltwise(n.upscore2_geo, n.score_pool4_geoc,
85 | operation=P.Eltwise.SUM)
86 | n.upscore16_geo = L.Deconvolution(n.fuse_pool4_geo,
87 | convolution_param=dict(num_output=3, kernel_size=32, stride=16,
88 | bias_term=False),
89 | param=[dict(lr_mult=0)])
90 |
91 | n.score_geo = crop(n.upscore16_geo, n.data)
92 | n.loss_geo = L.SoftmaxWithLoss(n.score_geo, n.geo,
93 | loss_param=dict(normalize=False, ignore_label=255))
94 |
95 | return n.to_proto()
96 |
97 | def make_net():
98 | with open('trainval.prototxt', 'w') as f:
99 | f.write(str(fcn('trainval')))
100 |
101 | with open('test.prototxt', 'w') as f:
102 | f.write(str(fcn('test')))
103 |
104 | if __name__ == '__main__':
105 | make_net()
106 |
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/siftflow_layers.py:
--------------------------------------------------------------------------------
1 | import caffe
2 |
3 | import numpy as np
4 | from PIL import Image
5 | import scipy.io
6 |
7 | import random
8 |
9 | class SIFTFlowSegDataLayer(caffe.Layer):
10 | """
11 | Load (input image, label image) pairs from SIFT Flow
12 | one-at-a-time while reshaping the net to preserve dimensions.
13 |
14 | This data layer has three tops:
15 |
16 | 1. the data, pre-processed
17 | 2. the semantic labels 0-32 and void 255
18 | 3. the geometric labels 0-2 and void 255
19 |
20 | Use this to feed data to a fully convolutional network.
21 | """
22 |
23 | def setup(self, bottom, top):
24 | """
25 | Setup data layer according to parameters:
26 |
27 | - siftflow_dir: path to SIFT Flow dir
28 | - split: train / val / test
29 | - randomize: load in random order (default: True)
30 | - seed: seed for randomization (default: None / current time)
31 |
32 | for semantic segmentation of object and geometric classes.
33 |
34 | example: params = dict(siftflow_dir="/path/to/siftflow", split="val")
35 | """
36 | # config
37 | params = eval(self.param_str)
38 | self.siftflow_dir = params['siftflow_dir']
39 | self.split = params['split']
40 | self.mean = np.array((114.578, 115.294, 108.353), dtype=np.float32)
41 | self.random = params.get('randomize', True)
42 | self.seed = params.get('seed', None)
43 |
44 | # three tops: data, semantic, geometric
45 | if len(top) != 3:
46 | raise Exception("Need to define three tops: data, semantic label, and geometric label.")
47 | # data layers have no bottoms
48 | if len(bottom) != 0:
49 | raise Exception("Do not define a bottom.")
50 |
51 | # load indices for images and labels
52 | split_f = '{}/{}.txt'.format(self.siftflow_dir, self.split)
53 | self.indices = open(split_f, 'r').read().splitlines()
54 | self.idx = 0
55 |
56 | # make eval deterministic
57 | if 'train' not in self.split:
58 | self.random = False
59 |
60 | # randomization: seed and pick
61 | if self.random:
62 | random.seed(self.seed)
63 | self.idx = random.randint(0, len(self.indices)-1)
64 |
65 | def reshape(self, bottom, top):
66 | # load image + label image pair
67 | self.data = self.load_image(self.indices[self.idx])
68 | self.label_semantic = self.load_label(self.indices[self.idx], label_type='semantic')
69 | self.label_geometric = self.load_label(self.indices[self.idx], label_type='geometric')
70 | # reshape tops to fit (leading 1 is for batch dimension)
71 | top[0].reshape(1, *self.data.shape)
72 | top[1].reshape(1, *self.label_semantic.shape)
73 | top[2].reshape(1, *self.label_geometric.shape)
74 |
75 | def forward(self, bottom, top):
76 | # assign output
77 | top[0].data[...] = self.data
78 | top[1].data[...] = self.label_semantic
79 | top[2].data[...] = self.label_geometric
80 |
81 | # pick next input
82 | if self.random:
83 | self.idx = random.randint(0, len(self.indices)-1)
84 | else:
85 | self.idx += 1
86 | if self.idx == len(self.indices):
87 | self.idx = 0
88 |
89 | def backward(self, top, propagate_down, bottom):
90 | pass
91 |
92 | def load_image(self, idx):
93 | """
94 | Load input image and preprocess for Caffe:
95 | - cast to float
96 | - switch channels RGB -> BGR
97 | - subtract mean
98 | - transpose to channel x height x width order
99 | """
100 | im = Image.open('{}/Images/spatial_envelope_256x256_static_8outdoorcategories/{}.jpg'.format(self.siftflow_dir, idx))
101 | in_ = np.array(im, dtype=np.float32)
102 | in_ = in_[:,:,::-1]
103 | in_ -= self.mean
104 | in_ = in_.transpose((2,0,1))
105 | return in_
106 |
107 | def load_label(self, idx, label_type=None):
108 | """
109 | Load label image as 1 x height x width integer array of label indices.
110 | The leading singleton dimension is required by the loss.
111 | """
112 | if label_type == 'semantic':
113 | label = scipy.io.loadmat('{}/SemanticLabels/spatial_envelope_256x256_static_8outdoorcategories/{}.mat'.format(self.siftflow_dir, idx))['S']
114 | elif label_type == 'geometric':
115 | label = scipy.io.loadmat('{}/GeoLabels/spatial_envelope_256x256_static_8outdoorcategories/{}.mat'.format(self.siftflow_dir, idx))['S']
116 | label[label == -1] = 0
117 | else:
118 | raise Exception("Unknown label type: {}. Pick semantic or geometric.".format(label_type))
119 | label = label.astype(np.uint8)
120 | label -= 1 # rotate labels so classes start at 0, void is 255
121 | label = label[np.newaxis, ...]
122 | return label.copy()
123 |
--------------------------------------------------------------------------------
/models/FCNGCN.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.nn.init as init
5 | import torch.utils.model_zoo as model_zoo
6 | from torchvision import models
7 |
8 | import math
9 |
10 |
11 | class GCN(nn.Module):
12 | def __init__(self, inplanes, planes, ks=7):
13 | super(GCN, self).__init__()
14 | self.conv_l1 = nn.Conv2d(inplanes, planes, kernel_size=(ks, 1),
15 | padding=(ks/2, 0))
16 |
17 | self.conv_l2 = nn.Conv2d(planes, planes, kernel_size=(1, ks),
18 | padding=(0, ks/2))
19 | self.conv_r1 = nn.Conv2d(inplanes, planes, kernel_size=(1, ks),
20 | padding=(0, ks/2))
21 | self.conv_r2 = nn.Conv2d(planes, planes, kernel_size=(ks, 1),
22 | padding=(ks/2, 0))
23 |
24 | def forward(self, x):
25 | x_l = self.conv_l1(x)
26 | x_l = self.conv_l2(x_l)
27 |
28 | x_r = self.conv_r1(x)
29 | x_r = self.conv_r2(x_r)
30 |
31 | x = x_l + x_r
32 |
33 | return x
34 |
35 |
36 | class Refine(nn.Module):
37 | def __init__(self, planes):
38 | super(Refine, self).__init__()
39 | self.bn = nn.BatchNorm2d(planes)
40 | self.relu = nn.ReLU(inplace=True)
41 | self.conv1 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
42 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
43 |
44 | def forward(self, x):
45 | residual = x
46 | x = self.bn(x)
47 | x = self.relu(x)
48 | x = self.conv1(x)
49 | x = self.bn(x)
50 | x = self.relu(x)
51 | x = self.conv2(x)
52 |
53 | out = residual + x
54 | return out
55 |
56 |
57 | class FCNGCN(nn.Module):
58 | def __init__(self, num_input_chanel,num_classes):
59 | super(FCNGCN, self).__init__()
60 |
61 | self.num_classes = num_classes
62 | self.num_input_chanel = num_input_chanel
63 |
64 | resnet = models.resnet50(pretrained=True)
65 |
66 | # self.conv1 = resnet.conv1
67 | self.conv1 = nn.Conv2d(num_input_chanel, 64, kernel_size=7, stride=2, padding=3,
68 | bias=False)
69 | self.bn0 = resnet.bn1
70 | self.relu = resnet.relu
71 | self.maxpool = resnet.maxpool
72 |
73 | self.layer1 = resnet.layer1
74 | self.layer2 = resnet.layer2
75 | self.layer3 = resnet.layer3
76 | self.layer4 = resnet.layer4
77 |
78 | self.gcn1 = GCN(2048, self.num_classes)
79 | self.gcn2 = GCN(1024, self.num_classes)
80 | self.gcn3 = GCN(512, self.num_classes)
81 | self.gcn4 = GCN(64, self.num_classes)
82 | self.gcn5 = GCN(64, self.num_classes)
83 |
84 | self.refine1 = Refine(self.num_classes)
85 | self.refine2 = Refine(self.num_classes)
86 | self.refine3 = Refine(self.num_classes)
87 | self.refine4 = Refine(self.num_classes)
88 | self.refine5 = Refine(self.num_classes)
89 | self.refine6 = Refine(self.num_classes)
90 | self.refine7 = Refine(self.num_classes)
91 | self.refine8 = Refine(self.num_classes)
92 | self.refine9 = Refine(self.num_classes)
93 | self.refine10 = Refine(self.num_classes)
94 |
95 | self.out0 = self._classifier(2048)
96 | self.out1 = self._classifier(1024)
97 | self.out2 = self._classifier(512)
98 | self.out_e = self._classifier(256)
99 | self.out3 = self._classifier(64)
100 | self.out4 = self._classifier(64)
101 | self.out5 = self._classifier(32)
102 |
103 | self.transformer = nn.Conv2d(256, 64, kernel_size=1)
104 |
105 | def _classifier(self, inplanes):
106 | return nn.Sequential(
107 | nn.Conv2d(inplanes, inplanes, 3, padding=1, bias=False),
108 | nn.BatchNorm2d(inplanes/2),
109 | nn.ReLU(inplace=True),
110 | nn.Dropout(.1),
111 | nn.Conv2d(inplanes/2, self.num_classes, 1),
112 | )
113 |
114 | def forward(self, x):
115 | input = x
116 | x = self.conv1(x)
117 | x = self.bn0(x)
118 | x = self.relu(x)
119 | conv_x = x
120 | x = self.maxpool(x)
121 | pool_x = x
122 |
123 | fm1 = self.layer1(x)
124 | fm2 = self.layer2(fm1)
125 | fm3 = self.layer3(fm2)
126 | fm4 = self.layer4(fm3)
127 |
128 | gcfm1 = self.refine1(self.gcn1(fm4))
129 | gcfm2 = self.refine2(self.gcn2(fm3))
130 | gcfm3 = self.refine3(self.gcn3(fm2))
131 | gcfm4 = self.refine4(self.gcn4(pool_x))
132 | gcfm5 = self.refine5(self.gcn5(conv_x))
133 |
134 | fs1 = self.refine6(F.upsample_bilinear(gcfm1, fm3.size()[2:]) + gcfm2)
135 | fs2 = self.refine7(F.upsample_bilinear(fs1, fm2.size()[2:]) + gcfm3)
136 | fs3 = self.refine8(F.upsample_bilinear(fs2, pool_x.size()[2:]) + gcfm4)
137 | fs4 = self.refine9(F.upsample_bilinear(fs3, conv_x.size()[2:]) + gcfm5)
138 | out = self.refine10(F.upsample_bilinear(fs4, input.size()[2:]))
139 |
140 | return out
141 |
142 | # return out, fs4, fs3, fs2, fs1, gcfm1
--------------------------------------------------------------------------------
/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext_layers.py:
--------------------------------------------------------------------------------
1 | import caffe
2 |
3 | import numpy as np
4 | from PIL import Image
5 | import scipy.io
6 |
7 | import random
8 |
9 | class PASCALContextSegDataLayer(caffe.Layer):
10 | """
11 | Load (input image, label image) pairs from PASCAL-Context
12 | one-at-a-time while reshaping the net to preserve dimensions.
13 |
14 | The labels follow the 59 class task defined by
15 |
16 | R. Mottaghi, X. Chen, X. Liu, N.-G. Cho, S.-W. Lee, S. Fidler, R.
17 | Urtasun, and A. Yuille. The Role of Context for Object Detection and
18 | Semantic Segmentation in the Wild. CVPR 2014.
19 |
20 | Use this to feed data to a fully convolutional network.
21 | """
22 |
23 | def setup(self, bottom, top):
24 | """
25 | Setup data layer according to parameters:
26 |
27 | - voc_dir: path to PASCAL VOC dir (must contain 2010)
28 | - context_dir: path to PASCAL-Context annotations
29 | - split: train / val / test
30 | - randomize: load in random order (default: True)
31 | - seed: seed for randomization (default: None / current time)
32 |
33 | for PASCAL-Context semantic segmentation.
34 |
35 | example: params = dict(voc_dir="/path/to/PASCAL", split="val")
36 | """
37 | # config
38 | params = eval(self.param_str)
39 | self.voc_dir = params['voc_dir'] + '/VOC2010'
40 | self.context_dir = params['context_dir']
41 | self.split = params['split']
42 | self.mean = np.array((104.007, 116.669, 122.679), dtype=np.float32)
43 | self.random = params.get('randomize', True)
44 | self.seed = params.get('seed', None)
45 |
46 | # load labels and resolve inconsistencies by mapping to full 400 labels
47 | self.labels_400 = [label.replace(' ','') for idx, label in np.genfromtxt(self.context_dir + '/labels.txt', delimiter=':', dtype=None)]
48 | self.labels_59 = [label.replace(' ','') for idx, label in np.genfromtxt(self.context_dir + '/59_labels.txt', delimiter=':', dtype=None)]
49 | for main_label, task_label in zip(('table', 'bedclothes', 'cloth'), ('diningtable', 'bedcloth', 'clothes')):
50 | self.labels_59[self.labels_59.index(task_label)] = main_label
51 |
52 | # two tops: data and label
53 | if len(top) != 2:
54 | raise Exception("Need to define two tops: data and label.")
55 | # data layers have no bottoms
56 | if len(bottom) != 0:
57 | raise Exception("Do not define a bottom.")
58 |
59 | # load indices for images and labels
60 | split_f = '{}/ImageSets/Main/{}.txt'.format(self.voc_dir,
61 | self.split)
62 | self.indices = open(split_f, 'r').read().splitlines()
63 | self.idx = 0
64 |
65 | # make eval deterministic
66 | if 'train' not in self.split:
67 | self.random = False
68 |
69 | # randomization: seed and pick
70 | if self.random:
71 | random.seed(self.seed)
72 | self.idx = random.randint(0, len(self.indices)-1)
73 |
74 | def reshape(self, bottom, top):
75 | # load image + label image pair
76 | self.data = self.load_image(self.indices[self.idx])
77 | self.label = self.load_label(self.indices[self.idx])
78 | # reshape tops to fit (leading 1 is for batch dimension)
79 | top[0].reshape(1, *self.data.shape)
80 | top[1].reshape(1, *self.label.shape)
81 |
82 | def forward(self, bottom, top):
83 | # assign output
84 | top[0].data[...] = self.data
85 | top[1].data[...] = self.label
86 |
87 | # pick next input
88 | if self.random:
89 | self.idx = random.randint(0, len(self.indices)-1)
90 | else:
91 | self.idx += 1
92 | if self.idx == len(self.indices):
93 | self.idx = 0
94 |
95 | def backward(self, top, propagate_down, bottom):
96 | pass
97 |
98 | def load_image(self, idx):
99 | """
100 | Load input image and preprocess for Caffe:
101 | - cast to float
102 | - switch channels RGB -> BGR
103 | - subtract mean
104 | - transpose to channel x height x width order
105 | """
106 | im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
107 | in_ = np.array(im, dtype=np.float32)
108 | in_ = in_[:,:,::-1]
109 | in_ -= self.mean
110 | in_ = in_.transpose((2,0,1))
111 | return in_
112 |
113 | def load_label(self, idx):
114 | """
115 | Load label image as 1 x height x width integer array of label indices.
116 | The leading singleton dimension is required by the loss.
117 | The full 400 labels are translated to the 59 class task labels.
118 | """
119 | label_400 = scipy.io.loadmat('{}/trainval/{}.mat'.format(self.context_dir, idx))['LabelMap']
120 | label = np.zeros_like(label_400, dtype=np.uint8)
121 | for idx, l in enumerate(self.labels_59):
122 | idx_400 = self.labels_400.index(l) + 1
123 | label[label_400 == idx_400] = idx + 1
124 | label = label[np.newaxis, ...]
125 | return label
126 |
--------------------------------------------------------------------------------