├── data ├── __init__.py ├── __init__.pyc ├── yh_dataset.pyc ├── base_dataset.pyc ├── data_loader.pyc ├── image_folder.pyc ├── random_crop_yh.pyc ├── yh_seg_dataset.pyc ├── base_data_loader.pyc ├── unaligned_dataset.pyc ├── yh_test_seg_dataset.pyc ├── custom_dataset_data_loader.pyc ├── data_loader.py ├── base_data_loader.py ├── single_dataset.py ├── unaligned_dataset.py ├── yh_dataset.py ├── random_crop_yh.py ├── custom_dataset_data_loader.py ├── aligned_dataset.py ├── image_folder.py ├── base_dataset.py ├── yh_seg_spleenonly_dataset.py ├── yh_test_seg_dataset.py └── yh_seg_dataset.py ├── models ├── __init__.py ├── FCNGCN.pyc ├── models.pyc ├── __init__.pyc ├── networks.pyc ├── base_model.pyc ├── test_model.pyc ├── cycle_gan_model.pyc ├── cycle_seg_model.pyc ├── test_seg_model.pyc ├── models.py ├── test_model.py ├── base_model.py ├── test_seg_model.py └── FCNGCN.py ├── util ├── __init__.py ├── html.pyc ├── util.pyc ├── __init__.pyc ├── image_pool.pyc ├── visualizer.pyc ├── png.py ├── image_pool.py ├── html.py ├── util.py └── get_data.py ├── options ├── __init__.py ├── __init__.pyc ├── base_options.pyc ├── train_options.pyc ├── .idea │ ├── misc.xml │ ├── modules.xml │ └── options.iml ├── test_options.py └── train_options.py ├── torchsrc ├── models │ ├── __init__.py │ └── __init__.pyc ├── utils │ ├── __init__.py │ ├── __init__.pyc │ ├── image_pool.pyc │ ├── image_pool.py │ └── util.py ├── datasets │ ├── apc │ │ ├── data │ │ │ └── mit_training_blacklist.yaml │ │ ├── jsk.pyc │ │ ├── rbo.pyc │ │ ├── v1.pyc │ │ ├── v2.pyc │ │ ├── v3.pyc │ │ ├── base.pyc │ │ ├── __init__.pyc │ │ ├── mit_benchmark.pyc │ │ ├── mit_training.pyc │ │ ├── __init__.py │ │ ├── v1.py │ │ ├── v2.py │ │ ├── v3.py │ │ ├── jsk.py │ │ ├── base.py │ │ ├── rbo.py │ │ ├── mit_training.py │ │ └── mit_benchmark.py │ ├── voc.pyc │ ├── __init__.pyc │ └── __init__.py ├── utils.pyc ├── __init__.pyc ├── trainer.pyc ├── __init__.py ├── ext │ └── fcn.berkeleyvision.org │ │ ├── voc-fcn16s │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── voc-fcn32s │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── voc-fcn8s │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── nyud-fcn32s-hha │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── siftflow-fcn16s │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── siftflow-fcn32s │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── siftflow-fcn8s │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ └── solve.py │ │ ├── voc-fcn-alexnet │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── voc-fcn8s-atonce │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── nyud-fcn32s-color │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── pascalcontext-fcn16s │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── pascalcontext-fcn32s │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── pascalcontext-fcn8s │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── nyud-fcn32s-color-hha │ │ ├── caffemodel-url │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── data │ │ ├── pascal │ │ │ ├── classes.txt │ │ │ └── README.md │ │ ├── nyud │ │ │ ├── classes.txt │ │ │ ├── README.md │ │ │ ├── train.txt │ │ │ └── val.txt │ │ ├── sift-flow │ │ │ ├── README.md │ │ │ ├── classes.txt │ │ │ └── test.txt │ │ └── pascal-context │ │ │ ├── classes-59.txt │ │ │ └── README.md │ │ ├── nyud-fcn32s-color-d │ │ ├── solver.prototxt │ │ ├── solve.py │ │ └── net.py │ │ ├── ilsvrc-nets │ │ └── README.md │ │ ├── infer.py │ │ ├── voc_helper.py │ │ ├── score.py │ │ ├── surgery.py │ │ ├── siftflow_layers.py │ │ └── pascalcontext_layers.py └── utils.py ├── imgs ├── Figure1.jpg ├── Figure2.jpg └── Figure3.jpg ├── sublist.py ├── LICENSE1 ├── LICENSE2 └── README.md /data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /options/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /torchsrc/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /torchsrc/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /torchsrc/datasets/apc/data/mit_training_blacklist.yaml: -------------------------------------------------------------------------------- 1 | - 59651 2 | - 87744 3 | -------------------------------------------------------------------------------- /util/html.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/util/html.pyc -------------------------------------------------------------------------------- /util/util.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/util/util.pyc -------------------------------------------------------------------------------- /data/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/__init__.pyc -------------------------------------------------------------------------------- /imgs/Figure1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/imgs/Figure1.jpg -------------------------------------------------------------------------------- /imgs/Figure2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/imgs/Figure2.jpg -------------------------------------------------------------------------------- /imgs/Figure3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/imgs/Figure3.jpg -------------------------------------------------------------------------------- /models/FCNGCN.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/FCNGCN.pyc -------------------------------------------------------------------------------- /models/models.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/models.pyc -------------------------------------------------------------------------------- /util/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/util/__init__.pyc -------------------------------------------------------------------------------- /data/yh_dataset.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/yh_dataset.pyc -------------------------------------------------------------------------------- /models/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/__init__.pyc -------------------------------------------------------------------------------- /models/networks.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/networks.pyc -------------------------------------------------------------------------------- /torchsrc/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/utils.pyc -------------------------------------------------------------------------------- /util/image_pool.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/util/image_pool.pyc -------------------------------------------------------------------------------- /util/visualizer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/util/visualizer.pyc -------------------------------------------------------------------------------- /data/base_dataset.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/base_dataset.pyc -------------------------------------------------------------------------------- /data/data_loader.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/data_loader.pyc -------------------------------------------------------------------------------- /data/image_folder.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/image_folder.pyc -------------------------------------------------------------------------------- /models/base_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/base_model.pyc -------------------------------------------------------------------------------- /models/test_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/test_model.pyc -------------------------------------------------------------------------------- /options/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/options/__init__.pyc -------------------------------------------------------------------------------- /torchsrc/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/__init__.pyc -------------------------------------------------------------------------------- /torchsrc/trainer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/trainer.pyc -------------------------------------------------------------------------------- /data/random_crop_yh.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/random_crop_yh.pyc -------------------------------------------------------------------------------- /data/yh_seg_dataset.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/yh_seg_dataset.pyc -------------------------------------------------------------------------------- /options/base_options.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/options/base_options.pyc -------------------------------------------------------------------------------- /data/base_data_loader.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/base_data_loader.pyc -------------------------------------------------------------------------------- /data/unaligned_dataset.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/unaligned_dataset.pyc -------------------------------------------------------------------------------- /models/cycle_gan_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/cycle_gan_model.pyc -------------------------------------------------------------------------------- /models/cycle_seg_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/cycle_seg_model.pyc -------------------------------------------------------------------------------- /models/test_seg_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/models/test_seg_model.pyc -------------------------------------------------------------------------------- /options/train_options.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/options/train_options.pyc -------------------------------------------------------------------------------- /torchsrc/datasets/voc.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/voc.pyc -------------------------------------------------------------------------------- /torchsrc/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/utils/__init__.pyc -------------------------------------------------------------------------------- /data/yh_test_seg_dataset.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/yh_test_seg_dataset.pyc -------------------------------------------------------------------------------- /torchsrc/datasets/apc/jsk.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/jsk.pyc -------------------------------------------------------------------------------- /torchsrc/datasets/apc/rbo.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/rbo.pyc -------------------------------------------------------------------------------- /torchsrc/datasets/apc/v1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/v1.pyc -------------------------------------------------------------------------------- /torchsrc/datasets/apc/v2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/v2.pyc -------------------------------------------------------------------------------- /torchsrc/datasets/apc/v3.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/v3.pyc -------------------------------------------------------------------------------- /torchsrc/models/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/models/__init__.pyc -------------------------------------------------------------------------------- /torchsrc/utils/image_pool.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/utils/image_pool.pyc -------------------------------------------------------------------------------- /torchsrc/datasets/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/__init__.pyc -------------------------------------------------------------------------------- /torchsrc/datasets/apc/base.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/base.pyc -------------------------------------------------------------------------------- /torchsrc/datasets/apc/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/__init__.pyc -------------------------------------------------------------------------------- /data/custom_dataset_data_loader.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/data/custom_dataset_data_loader.pyc -------------------------------------------------------------------------------- /torchsrc/__init__.py: -------------------------------------------------------------------------------- 1 | from . import models # NOQA 2 | from .trainer import Trainer # NOQA 3 | from . import utils # NOQA 4 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/fcn16s-heavy-pascal.caffemodel -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/fcn32s-heavy-pascal.caffemodel -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel -------------------------------------------------------------------------------- /torchsrc/datasets/apc/mit_benchmark.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/mit_benchmark.pyc -------------------------------------------------------------------------------- /torchsrc/datasets/apc/mit_training.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MASILab/SynSeg-Net/HEAD/torchsrc/datasets/apc/mit_training.pyc -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/nyud-fcn32s-hha-heavy.caffemodel 2 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/siftflow-fcn16s-heavy.caffemodel 2 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/siftflow-fcn32s-heavy.caffemodel 2 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/siftflow-fcn8s-heavy.caffemodel 2 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/fcn-alexnet-pascal.caffemodel 2 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/fcn8s-atonce-pascal.caffemodel 2 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/nyud-fcn32s-color-heavy.caffemodel 2 | -------------------------------------------------------------------------------- /torchsrc/datasets/apc/__init__.py: -------------------------------------------------------------------------------- 1 | from v1 import APC2016V1 # NOQA 2 | from v2 import APC2016V2 # NOQA 3 | from v3 import APC2016V3 # NOQA 4 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/pascalcontext-fcn16s-heavy.caffemodel 2 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/pascalcontext-fcn32s-heavy.caffemodel 2 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/pascalcontext-fcn8s-heavy.caffemodel 2 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/caffemodel-url: -------------------------------------------------------------------------------- 1 | http://dl.caffe.berkeleyvision.org/nyud-fcn32s-color-hha-heavy.caffemodel 2 | -------------------------------------------------------------------------------- /data/data_loader.py: -------------------------------------------------------------------------------- 1 | 2 | def CreateDataLoader(opt): 3 | from data.custom_dataset_data_loader import CustomDatasetDataLoader 4 | data_loader = CustomDatasetDataLoader() 5 | print(data_loader.name()) 6 | data_loader.initialize(opt) 7 | return data_loader 8 | -------------------------------------------------------------------------------- /torchsrc/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .apc import APC2016V1 # NOQA 2 | from .apc import APC2016V2 # NOQA 3 | from .apc import APC2016V3 # NOQA 4 | from .voc import SBDClassSeg # NOQA 5 | from .voc import VOC2011ClassSeg # NOQA 6 | from .voc import VOC2012ClassSeg # NOQA 7 | -------------------------------------------------------------------------------- /data/base_data_loader.py: -------------------------------------------------------------------------------- 1 | 2 | class BaseDataLoader(): 3 | def __init__(self): 4 | pass 5 | 6 | def initialize(self, opt): 7 | self.opt = opt 8 | pass 9 | 10 | def load_data(): 11 | return None 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /options/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /options/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/data/pascal/classes.txt: -------------------------------------------------------------------------------- 1 | background 2 | aeroplane 3 | bicycle 4 | bird 5 | boat 6 | bottle 7 | bus 8 | car 9 | cat 10 | chair 11 | cow 12 | diningtable 13 | dog 14 | horse 15 | motorbike 16 | person 17 | pottedplant 18 | sheep 19 | sofa 20 | train 21 | tvmonitor 22 | 23 | and 255 is the ignore label that marks pixels excluded from learning and 24 | evaluation by the PASCAL VOC ground truth. 25 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "trainval.prototxt" 2 | test_net: "test.prototxt" 3 | test_iter: 200 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-12 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 300000 16 | weight_decay: 0.0005 17 | test_initialization: false 18 | -------------------------------------------------------------------------------- /options/.idea/options.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "trainval.prototxt" 2 | test_net: "test.prototxt" 3 | test_iter: 200 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-12 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 300000 16 | weight_decay: 0.0005 17 | test_initialization: false 18 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "trainval.prototxt" 2 | test_net: "test.prototxt" 3 | test_iter: 200 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-10 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 300000 16 | weight_decay: 0.0005 17 | test_initialization: false 18 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "train.prototxt" 2 | test_net: "val.prototxt" 3 | test_iter: 736 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-14 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 100000 16 | weight_decay: 0.0005 17 | snapshot: 4000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "train.prototxt" 2 | test_net: "val.prototxt" 3 | test_iter: 736 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for normalized softmax 10 | base_lr: 1e-4 11 | # standard momentum 12 | momentum: 0.9 13 | # gradient accumulation 14 | iter_size: 20 15 | max_iter: 100000 16 | weight_decay: 0.0005 17 | snapshot: 4000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "train.prototxt" 2 | test_net: "val.prototxt" 3 | test_iter: 736 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-12 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 100000 16 | weight_decay: 0.0005 17 | snapshot: 4000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "train.prototxt" 2 | test_net: "val.prototxt" 3 | test_iter: 736 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-10 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 100000 16 | weight_decay: 0.0005 17 | snapshot: 4000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "trainval.prototxt" 2 | test_net: "test.prototxt" 3 | test_iter: 654 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-10 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 300000 16 | weight_decay: 0.0005 17 | snapshot: 2000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "train.prototxt" 2 | test_net: "val.prototxt" 3 | test_iter: 736 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-10 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 300000 16 | weight_decay: 0.0005 17 | snapshot: 4000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "trainval.prototxt" 2 | test_net: "test.prototxt" 3 | test_iter: 654 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-10 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 300000 16 | weight_decay: 0.0005 17 | snapshot: 2000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "trainval.prototxt" 2 | test_net: "test.prototxt" 3 | test_iter: 654 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-10 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 300000 16 | weight_decay: 0.0005 17 | snapshot: 2000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "train.prototxt" 2 | test_net: "val.prototxt" 3 | test_iter: 5105 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-12 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 300000 16 | weight_decay: 0.0005 17 | snapshot: 4000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "train.prototxt" 2 | test_net: "val.prototxt" 3 | test_iter: 5105 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-10 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 300000 16 | weight_decay: 0.0005 17 | snapshot: 4000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "train.prototxt" 2 | test_net: "val.prototxt" 3 | test_iter: 5105 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-14 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 300000 16 | weight_decay: 0.0005 17 | snapshot: 4000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "trainval.prototxt" 2 | test_net: "test.prototxt" 3 | test_iter: 654 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-12 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 300000 16 | weight_decay: 0.0005 17 | snapshot: 2000 18 | snapshot_prefix: "snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/data/nyud/classes.txt: -------------------------------------------------------------------------------- 1 | wall 2 | floor 3 | cabinet 4 | bed 5 | chair 6 | sofa 7 | table 8 | door 9 | window 10 | bookshelf 11 | picture 12 | counter 13 | blinds 14 | desk 15 | shelves 16 | curtain 17 | dresser 18 | pillow 19 | mirror 20 | floor mat 21 | clothes 22 | ceiling 23 | books 24 | refridgerator 25 | television 26 | paper 27 | towel 28 | shower curtain 29 | box 30 | whiteboard 31 | person 32 | night stand 33 | toilet 34 | sink 35 | lamp 36 | bathtub 37 | bag 38 | otherstructure 39 | otherfurniture 40 | otherprop 41 | 42 | and 0 is void (and converted to 255 by the NYUDSegDataLayer) 43 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/ilsvrc-nets/README.md: -------------------------------------------------------------------------------- 1 | # ILSVRC Networks 2 | 3 | These classification networks are trained on ILSVRC for object recognition. 4 | We cast these nets into fully convolutional form to make use of their parameters as pre-training. 5 | 6 | To reproduce our FCNs, or train your own on your own data, you need to first collect the corresponding base network. 7 | 8 | - [VGG16](https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md) 9 | - [CaffeNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_reference_caffenet) 10 | - [BVLC GoogLeNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet) 11 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/infer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | import caffe 5 | 6 | # load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe 7 | im = Image.open('pascal/VOC2010/JPEGImages/2007_000129.jpg') 8 | in_ = np.array(im, dtype=np.float32) 9 | in_ = in_[:,:,::-1] 10 | in_ -= np.array((104.00698793,116.66876762,122.67891434)) 11 | in_ = in_.transpose((2,0,1)) 12 | 13 | # load net 14 | net = caffe.Net('voc-fcn8s/deploy.prototxt', 'voc-fcn8s/fcn8s-heavy-pascal.caffemodel', caffe.TEST) 15 | # shape for input (data blob is N x C x H x W), set data 16 | net.blobs['data'].reshape(1, *in_.shape) 17 | net.blobs['data'].data[...] = in_ 18 | # run net and take argmax for prediction 19 | net.forward() 20 | out = net.blobs['score'].data[0].argmax(axis=0) 21 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/README.md: -------------------------------------------------------------------------------- 1 | # SIFT Flow 2 | 3 | SIFT Flow is a semantic segmentation dataset with two labelings: 4 | 5 | - semantic classes, such as "cat" or "dog" 6 | - geometric classes, consisting of "horizontal, vertical, and sky" 7 | 8 | Refer to `classes.txt` for the listing of classes in model output order. 9 | Refer to `../siftflow_layers.py` for the Python data layer for this dataset. 10 | 11 | Note that the dataset has a number of issues, including unannotated images and missing classes from the test set. 12 | The provided splits exclude the unannotated images. 13 | As noted in the paper, care must be taken for proper evalution by excluding the missing classes. 14 | 15 | Download the dataset: 16 | http://www.cs.unc.edu/~jtighe/Papers/ECCV10/siftflow/SiftFlowDataset.zip 17 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../voc-fcn32s/voc-fcn32s.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | val = np.loadtxt('../data/segvalid11.txt', dtype=str) 29 | 30 | for _ in range(25): 31 | solver.step(4000) 32 | score.seg_tests(solver, False, val, layer='score') 33 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | val = np.loadtxt('../data/segvalid11.txt', dtype=str) 29 | 30 | for _ in range(25): 31 | solver.step(4000) 32 | score.seg_tests(solver, False, val, layer='score') 33 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../voc-fcn16s/voc-fcn16s.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | val = np.loadtxt('../data/segvalid11.txt', dtype=str) 29 | 30 | for _ in range(25): 31 | solver.step(4000) 32 | score.seg_tests(solver, False, val, layer='score') 33 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | test = np.loadtxt('../data/nyud/test.txt', dtype=str) 29 | 30 | for _ in range(50): 31 | solver.step(2000) 32 | score.seg_tests(solver, False, val, layer='score') 33 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | test = np.loadtxt('../data/nyud/test.txt', dtype=str) 29 | 30 | for _ in range(50): 31 | solver.step(2000) 32 | score.seg_tests(solver, False, val, layer='score') 33 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../ilsvrc-nets/alexnet-fcn.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | val = np.loadtxt('../data/segvalid11.txt', dtype=str) 29 | 30 | for _ in range(25): 31 | solver.step(4000) 32 | score.seg_tests(solver, False, val, layer='score') 33 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | val = np.loadtxt('../data/segvalid11.txt', dtype=str) 29 | 30 | for _ in range(75): 31 | solver.step(4000) 32 | score.seg_tests(solver, False, val, layer='score') 33 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | val = np.loadtxt('../data/pascal/VOC2010/ImageSets/Main/val.txt', dtype=str) 29 | 30 | for _ in range(50): 31 | solver.step(8000) 32 | score.seg_tests(solver, False, val, layer='score') 33 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../pascalcontext-fcn32s/pascalcontext-fcn32s.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | val = np.loadtxt('../data/pascal/VOC2010/ImageSets/Main/val.txt', dtype=str) 29 | 30 | for _ in range(50): 31 | solver.step(8000) 32 | score.seg_tests(solver, False, val, layer='score') 33 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../pascalcontext-fcn16s/pascalcontext-fcn16s.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | val = np.loadtxt('../data/pascal/VOC2010/ImageSets/Main/val.txt', dtype=str) 29 | 30 | for _ in range(50): 31 | solver.step(8000) 32 | score.seg_tests(solver, False, val, layer='score') 33 | -------------------------------------------------------------------------------- /options/test_options.py: -------------------------------------------------------------------------------- 1 | from .base_options import BaseOptions 2 | 3 | 4 | class TestOptions(BaseOptions): 5 | def initialize(self): 6 | BaseOptions.initialize(self) 7 | self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.') 8 | self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.') 9 | self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images') 10 | self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') 11 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') 12 | self.parser.add_argument('--how_many', type=int, default=50, help='how many test images to run') 13 | self.isTrain = False 14 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/data/nyud/README.md: -------------------------------------------------------------------------------- 1 | # NYUDv2: NYU Depth Dataset V2 2 | 3 | NYUDv2 has a curated semantic segmentation challenge with RGB-D inputs and full scene labels of objects and surfaces. 4 | While there are many labels, we follow the 40 class task defined by 5 | 6 | > Perceptual Organization and Recognition of Indoor Scenes from RGB-D Images. 7 | Saurabh Gupta, Pablo Arbelaez, and Jitendra Malik. 8 | CVPR 2013 9 | 10 | at http://www.cs.berkeley.edu/~sgupta/pdf/GuptaArbelaezMalikCVPR13.pdf . 11 | To reproduce the results of our paper, you must make use of the data from Gupta et al. at http://people.eecs.berkeley.edu/~sgupta/cvpr13/data.tgz . 12 | 13 | Refer to `classes.txt` for the listing of classes in model output order. 14 | Refer to `../nyud_layers.py` for the Python data layer for this dataset. 15 | 16 | See the dataset site: http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html. 17 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/classes-59.txt: -------------------------------------------------------------------------------- 1 | 0: background 2 | 1: aeroplane 3 | 2: bicycle 4 | 3: bird 5 | 4: boat 6 | 5: bottle 7 | 6: bus 8 | 7: car 9 | 8: cat 10 | 9: chair 11 | 10: cow 12 | 11: diningtable 13 | 12: dog 14 | 13: horse 15 | 14: motorbike 16 | 15: person 17 | 16: pottedplant 18 | 17: sheep 19 | 18: sofa 20 | 19: train 21 | 20: tvmonitor 22 | 21: bag 23 | 22: bed 24 | 23: bench 25 | 24: book 26 | 25: building 27 | 26: cabinet 28 | 27: ceiling 29 | 28: clothes 30 | 29: computer 31 | 30: cup 32 | 31: door 33 | 32: fence 34 | 33: floor 35 | 34: flower 36 | 35: food 37 | 36: grass 38 | 37: ground 39 | 38: keyboard 40 | 39: light 41 | 40: mountain 42 | 41: mouse 43 | 42: curtain 44 | 43: platform 45 | 44: sign 46 | 45: plate 47 | 46: road 48 | 47: rock 49 | 48: shelves 50 | 49: sidewalk 51 | 50: sky 52 | 51: snow 53 | 52: bedcloth 54 | 53: track 55 | 54: tree 56 | 55: truck 57 | 56: wall 58 | 57: water 59 | 58: window 60 | 59: wood 61 | -------------------------------------------------------------------------------- /data/single_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import torchvision.transforms as transforms 3 | from data.base_dataset import BaseDataset, get_transform 4 | from data.image_folder import make_dataset 5 | from PIL import Image 6 | 7 | 8 | class SingleDataset(BaseDataset): 9 | def initialize(self, opt): 10 | self.opt = opt 11 | self.root = opt.dataroot 12 | self.dir_A = os.path.join(opt.dataroot) 13 | 14 | self.A_paths = make_dataset(self.dir_A) 15 | 16 | self.A_paths = sorted(self.A_paths) 17 | 18 | self.transform = get_transform(opt) 19 | 20 | def __getitem__(self, index): 21 | A_path = self.A_paths[index] 22 | 23 | A_img = Image.open(A_path).convert('RGB') 24 | 25 | A_img = self.transform(A_img) 26 | 27 | return {'A': A_img, 'A_paths': A_path} 28 | 29 | def __len__(self): 30 | return len(self.A_paths) 31 | 32 | def name(self): 33 | return 'SingleImageDataset' 34 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/README.md: -------------------------------------------------------------------------------- 1 | # PASCAL-Context 2 | 3 | PASCAL-Context is a full object and scene labeling of PASCAL VOC 2010. 4 | It includes both object (cat, dog, ...) and surface (sky, grass, ...) classes. 5 | 6 | We follow the 59 class task defined by 7 | 8 | > The Role of Context for Object Detection and Semantic Segmentation in the Wild. 9 | Roozbeh Mottaghi, Xianjie Chen, Xiaobai Liu, Nam-Gyu Cho, Seong-Whan Lee, Sanja Fidler, Raquel Urtasun, and Alan Yuille. 10 | CVPR 2014 11 | 12 | which selects the 59 most common classes for learning and evaluation. 13 | 14 | Refer to `classes-59.txt` for the listing of classes in model output order. 15 | Refer to `../pascalcontext_layers.py` for the Python data layer for this dataset. 16 | 17 | Note that care must be taken to map the raw class annotations into the 59 class task, as handled by our data layer. 18 | 19 | See the dataset site: http://www.cs.stanford.edu/~roozbeh/pascal-context/ 20 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/classes.txt: -------------------------------------------------------------------------------- 1 | Semantic and geometric segmentation classes for scenes. 2 | 3 | Semantic: 0 is void and 1–33 are classes. 4 | 5 | 01 awning 6 | 02 balcony 7 | 03 bird 8 | 04 boat 9 | 05 bridge 10 | 06 building 11 | 07 bus 12 | 08 car 13 | 09 cow 14 | 10 crosswalk 15 | 11 desert 16 | 12 door 17 | 13 fence 18 | 14 field 19 | 15 grass 20 | 16 moon 21 | 17 mountain 22 | 18 person 23 | 19 plant 24 | 20 pole 25 | 21 river 26 | 22 road 27 | 23 rock 28 | 24 sand 29 | 25 sea 30 | 26 sidewalk 31 | 27 sign 32 | 28 sky 33 | 29 staircase 34 | 30 streetlight 35 | 31 sun 36 | 32 tree 37 | 33 window 38 | 39 | Geometric: -1 is void and 1–3 are classes. 40 | 41 | 01 sky 42 | 02 horizontal 43 | 03 vertical 44 | 45 | N.B. Three classes (cow, desert, and moon) are absent from the test set, so 46 | they are excluded from evaluation. The highway_bost181 and street_urb506 images 47 | are missing annotations so these are likewise excluded from evaluation. 48 | -------------------------------------------------------------------------------- /torchsrc/datasets/apc/v1.py: -------------------------------------------------------------------------------- 1 | from base import APC2016Base 2 | from jsk import APC2016jsk 3 | from rbo import APC2016rbo 4 | 5 | 6 | class APC2016V1(APC2016Base): 7 | 8 | def __init__(self, split='train', transform=False): 9 | self.datasets = [ 10 | APC2016jsk(split, transform), 11 | APC2016rbo(split, transform), 12 | ] 13 | 14 | def __len__(self): 15 | return sum(len(d) for d in self.datasets) 16 | 17 | @property 18 | def split(self): 19 | split = self.datasets[0].split 20 | assert all(d.split == split for d in self.datasets) 21 | return split 22 | 23 | @split.setter 24 | def split(self, value): 25 | for d in self.datasets: 26 | d.split = value 27 | 28 | def __getitem__(self, index): 29 | skipped = 0 30 | for dataset in self.datasets: 31 | current_index = index - skipped 32 | if current_index < len(dataset): 33 | return dataset[current_index] 34 | skipped += len(dataset) 35 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | test = np.loadtxt('../data/sift-flow/test.txt', dtype=str) 29 | 30 | for _ in range(50): 31 | solver.step(2000) 32 | # N.B. metrics on the semantic labels are off b.c. of missing classes; 33 | # score manually from the histogram instead for proper evaluation 34 | score.seg_tests(solver, False, test, layer='score_sem', gt='sem') 35 | score.seg_tests(solver, False, test, layer='score_geo', gt='geo') 36 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../siftflow-fcn32s/siftflow-fcn32s.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | test = np.loadtxt('../data/sift-flow/test.txt', dtype=str) 29 | 30 | for _ in range(50): 31 | solver.step(2000) 32 | # N.B. metrics on the semantic labels are off b.c. of missing classes; 33 | # score manually from the histogram instead for proper evaluation 34 | score.seg_tests(solver, False, test, layer='score_sem', gt='sem') 35 | score.seg_tests(solver, False, test, layer='score_geo', gt='geo') 36 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../siftflow-fcn16s/siftflow-fcn16s.caffemodel' 15 | 16 | # init 17 | caffe.set_device(int(sys.argv[1])) 18 | caffe.set_mode_gpu() 19 | 20 | solver = caffe.SGDSolver('solver.prototxt') 21 | solver.net.copy_from(weights) 22 | 23 | # surgeries 24 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 25 | surgery.interp(solver.net, interp_layers) 26 | 27 | # scoring 28 | test = np.loadtxt('../data/sift-flow/test.txt', dtype=str) 29 | 30 | for _ in range(50): 31 | solver.step(2000) 32 | # N.B. metrics on the semantic labels are off b.c. of missing classes; 33 | # score manually from the histogram instead for proper evaluation 34 | score.seg_tests(solver, False, test, layer='score_sem', gt='sem') 35 | score.seg_tests(solver, False, test, layer='score_geo', gt='geo') 36 | -------------------------------------------------------------------------------- /util/png.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import zlib 3 | 4 | def encode(buf, width, height): 5 | """ buf: must be bytes or a bytearray in py3, a regular string in py2. formatted RGBRGB... """ 6 | assert (width * height * 3 == len(buf)) 7 | bpp = 3 8 | 9 | def raw_data(): 10 | # reverse the vertical line order and add null bytes at the start 11 | row_bytes = width * bpp 12 | for row_start in range((height - 1) * width * bpp, -1, -row_bytes): 13 | yield b'\x00' 14 | yield buf[row_start:row_start + row_bytes] 15 | 16 | def chunk(tag, data): 17 | return [ 18 | struct.pack("!I", len(data)), 19 | tag, 20 | data, 21 | struct.pack("!I", 0xFFFFFFFF & zlib.crc32(data, zlib.crc32(tag))) 22 | ] 23 | 24 | SIGNATURE = b'\x89PNG\r\n\x1a\n' 25 | COLOR_TYPE_RGB = 2 26 | COLOR_TYPE_RGBA = 6 27 | bit_depth = 8 28 | return b''.join( 29 | [ SIGNATURE ] + 30 | chunk(b'IHDR', struct.pack("!2I5B", width, height, bit_depth, COLOR_TYPE_RGB, 0, 0, 0)) + 31 | chunk(b'IDAT', zlib.compress(b''.join(raw_data()), 9)) + 32 | chunk(b'IEND', b'') 33 | ) 34 | -------------------------------------------------------------------------------- /torchsrc/datasets/apc/v2.py: -------------------------------------------------------------------------------- 1 | from base import APC2016Base 2 | from jsk import APC2016jsk 3 | from mit_benchmark import APC2016mit_benchmark 4 | from rbo import APC2016rbo 5 | 6 | 7 | class APC2016V2(APC2016Base): 8 | 9 | def __init__(self, split, transform): 10 | self.datasets = [ 11 | APC2016jsk(split, transform), 12 | APC2016rbo(split, transform), 13 | APC2016mit_benchmark(split, transform), 14 | ] 15 | 16 | def __len__(self): 17 | return sum(len(d) for d in self.datasets) 18 | 19 | @property 20 | def split(self): 21 | split = self.datasets[0].split 22 | assert all(d.split == split for d in self.datasets) 23 | return split 24 | 25 | @split.setter 26 | def split(self, value): 27 | for d in self.datasets: 28 | d.split = value 29 | 30 | def __getitem__(self, index): 31 | skipped = 0 32 | for dataset in self.datasets: 33 | current_index = index - skipped 34 | if current_index < len(dataset): 35 | return dataset[current_index] 36 | skipped += len(dataset) 37 | -------------------------------------------------------------------------------- /torchsrc/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def _fast_hist(label_true, label_pred, n_class): 5 | mask = (label_true >= 0) & (label_true < n_class) 6 | hist = np.bincount( 7 | n_class * label_true[mask].astype(int) + 8 | label_pred[mask], minlength=n_class**2).reshape(n_class, n_class) 9 | return hist 10 | 11 | 12 | def label_accuracy_score(label_trues, label_preds, n_class): 13 | """Returns accuracy score evaluation result. 14 | 15 | - overall accuracy 16 | - mean accuracy 17 | - mean IU 18 | - fwavacc 19 | """ 20 | hist = np.zeros((n_class, n_class)) 21 | for lt, lp in zip(label_trues, label_preds): 22 | hist += _fast_hist(lt.flatten(), lp.flatten(), n_class) 23 | acc = np.diag(hist).sum() / hist.sum() 24 | acc_cls = np.diag(hist) / hist.sum(axis=1) 25 | acc_cls = np.nanmean(acc_cls) 26 | iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)) 27 | mean_iu = np.nanmean(iu) 28 | freq = hist.sum(axis=1) / hist.sum() 29 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() 30 | return acc, acc_cls, mean_iu, fwavacc 31 | 32 | 33 | -------------------------------------------------------------------------------- /models/models.py: -------------------------------------------------------------------------------- 1 | 2 | def create_model(opt): 3 | model = None 4 | print(opt.model) 5 | if opt.model == 'cycle_gan': 6 | assert(opt.dataset_mode == 'unaligned' or opt.dataset_mode == 'yh') 7 | from .cycle_gan_model import CycleGANModel 8 | model = CycleGANModel() 9 | elif opt.model == 'pix2pix': 10 | assert(opt.dataset_mode == 'aligned') 11 | from .pix2pix_model import Pix2PixModel 12 | model = Pix2PixModel() 13 | elif opt.model == 'cycle_seg': 14 | assert(opt.dataset_mode == 'yh_seg' or opt.dataset_mode == 'yh_seg_spleen') 15 | from .cycle_seg_model import CycleSEGModel 16 | model = CycleSEGModel() 17 | elif opt.model == 'test': 18 | assert(opt.dataset_mode == 'yh_seg') 19 | from .test_model import TestModel 20 | model = TestModel() 21 | elif opt.model == 'test_seg': 22 | assert(opt.dataset_mode == 'yh_test_seg') 23 | from .test_seg_model import TestSegModel 24 | model = TestSegModel() 25 | else: 26 | raise ValueError("Model [%s] not recognized." % opt.model) 27 | model.initialize(opt) 28 | print("model [%s] was created" % (model.name())) 29 | return model 30 | -------------------------------------------------------------------------------- /util/image_pool.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | from torch.autograd import Variable 5 | class ImagePool(): 6 | def __init__(self, pool_size): 7 | self.pool_size = pool_size 8 | if self.pool_size > 0: 9 | self.num_imgs = 0 10 | self.images = [] 11 | 12 | def query(self, images): 13 | if self.pool_size == 0: 14 | return images 15 | return_images = [] 16 | for image in images.data: 17 | image = torch.unsqueeze(image, 0) 18 | if self.num_imgs < self.pool_size: 19 | self.num_imgs = self.num_imgs + 1 20 | self.images.append(image) 21 | return_images.append(image) 22 | else: 23 | p = random.uniform(0, 1) 24 | if p > 0.5: 25 | random_id = random.randint(0, self.pool_size-1) 26 | tmp = self.images[random_id].clone() 27 | self.images[random_id] = image 28 | return_images.append(tmp) 29 | else: 30 | return_images.append(image) 31 | return_images = Variable(torch.cat(return_images, 0)) 32 | return return_images 33 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | weights = '../ilsvrc-nets/vgg16-fcn.caffemodel' 15 | base_net = caffe.Net('../ilsvrc-nets/vgg16fcn.prototxt', '../vgg16fc.caffemodel', 16 | caffe.TEST) 17 | 18 | # init 19 | caffe.set_device(int(sys.argv[1])) 20 | caffe.set_mode_gpu() 21 | 22 | solver = caffe.SGDSolver('solver.prototxt') 23 | surgery.transplant(solver.net, base_net) 24 | 25 | # surgeries 26 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 27 | surgery.interp(solver.net, interp_layers) 28 | 29 | solver.net.params['conv1_1_bgrd'][0].data[:, :3] = base_net.params['conv1_1'][0].data 30 | solver.net.params['conv1_1_bgrd'][0].data[:, 3] = np.mean(base_net.params['conv1_1'][0].data, axis=1) 31 | solver.net.params['conv1_1_bgrd'][1].data[...] = base_net.params['conv1_1'][1].data 32 | 33 | del base_net 34 | 35 | # scoring 36 | test = np.loadtxt('../data/nyud/test.txt', dtype=str) 37 | 38 | for _ in range(50): 39 | solver.step(2000) 40 | score.seg_tests(solver, False, val, layer='score') 41 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/solve.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import surgery, score 3 | 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | try: 9 | import setproctitle 10 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 11 | except: 12 | pass 13 | 14 | color_proto = '../nyud-rgb-32s/trainval.prototxt' 15 | color_weights = '../nyud-rgb-32s/nyud-rgb-32s-28k.caffemodel' 16 | hha_proto = '../nyud-hha-32s/trainval.prototxt' 17 | hha_weights = '../nyud-hha-32s/nyud-hha-32s-60k.caffemodel' 18 | 19 | # init 20 | caffe.set_device(int(sys.argv[1])) 21 | caffe.set_mode_gpu() 22 | 23 | solver = caffe.SGDSolver('solver.prototxt') 24 | 25 | # surgeries 26 | color_net = caffe.Net(color_proto, color_weights, caffe.TEST) 27 | surgery.transplant(solver.net, color_net, suffix='color') 28 | del color_net 29 | 30 | hha_net = caffe.Net(hha_proto, hha_weights, caffe.TEST) 31 | surgery.transplant(solver.net, hha_net, suffix='hha') 32 | del hha_net 33 | 34 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 35 | surgery.interp(solver.net, interp_layers) 36 | 37 | # scoring 38 | test = np.loadtxt('../data/nyud/test.txt', dtype=str) 39 | 40 | for _ in range(50): 41 | solver.step(2000) 42 | score.seg_tests(solver, False, val, layer='score') 43 | -------------------------------------------------------------------------------- /torchsrc/utils/image_pool.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | from pdb import set_trace as st 5 | from torch.autograd import Variable 6 | class ImagePool(): 7 | def __init__(self, pool_size): 8 | self.pool_size = pool_size 9 | if self.pool_size > 0: 10 | self.num_imgs = 0 11 | self.images = [] 12 | 13 | def query(self, images): 14 | if self.pool_size == 0: 15 | return images 16 | return_images = [] 17 | for image in images.data: 18 | image = torch.unsqueeze(image, 0) 19 | if self.num_imgs < self.pool_size: 20 | self.num_imgs = self.num_imgs + 1 21 | self.images.append(image) 22 | return_images.append(image) 23 | else: 24 | p = random.uniform(0, 1) 25 | if p > 0.5: 26 | random_id = random.randint(0, self.pool_size-1) 27 | tmp = self.images[random_id].clone() 28 | self.images[random_id] = image 29 | return_images.append(tmp) 30 | else: 31 | return_images.append(image) 32 | return_images = Variable(torch.cat(return_images, 0)) 33 | return return_images 34 | -------------------------------------------------------------------------------- /torchsrc/datasets/apc/v3.py: -------------------------------------------------------------------------------- 1 | from base import APC2016Base 2 | from jsk import APC2016jsk 3 | from mit_benchmark import APC2016mit_benchmark 4 | from mit_training import APC2016mit_training 5 | from rbo import APC2016rbo 6 | 7 | 8 | class APC2016V3(APC2016Base): 9 | 10 | def __init__(self, split, transform=False): 11 | if split == 'train': 12 | self.datasets = [ 13 | APC2016mit_training(transform), 14 | APC2016jsk('all', transform), 15 | APC2016rbo('all', transform), 16 | ] 17 | elif split == 'valid': 18 | self.datasets = [ 19 | APC2016mit_benchmark('all', transform), 20 | ] 21 | else: 22 | raise ValueError('Unsupported split: %s' % split) 23 | 24 | def __len__(self): 25 | return sum(len(d) for d in self.datasets) 26 | 27 | @property 28 | def split(self): 29 | raise RuntimeError('Not supported.') 30 | 31 | @split.setter 32 | def split(self, value): 33 | raise RuntimeError('Not supported.') 34 | 35 | def __getitem__(self, index): 36 | skipped = 0 37 | for dataset in self.datasets: 38 | current_index = index - skipped 39 | if current_index < len(dataset): 40 | return dataset[current_index] 41 | skipped += len(dataset) 42 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/data/pascal/README.md: -------------------------------------------------------------------------------- 1 | # PASCAL VOC and SBD 2 | 3 | PASCAL VOC is a standard recognition dataset and benchmark with detection and semantic segmentation challenges. 4 | The semantic segmentation challenge annotates 20 object classes and background. 5 | The Semantic Boundary Dataset (SBD) is a further annotation of the PASCAL VOC data that provides more semantic segmentation and instance segmentation masks. 6 | 7 | PASCAL VOC has a private test set and [leaderboard for semantic segmentation](http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=6). 8 | 9 | The train/val/test splits of PASCAL VOC segmentation challenge and SBD diverge. 10 | Most notably VOC 2011 segval intersects with SBD train. 11 | Care must be taken for proper evaluation by excluding images from the train or val splits. 12 | 13 | We train on the 8,498 images of SBD train. 14 | We validate on the non-intersecting set defined in the included `seg11valid.txt`. 15 | 16 | Refer to `classes.txt` for the listing of classes in model output order. 17 | Refer to `../voc_layers.py` for the Python data layer for this dataset. 18 | 19 | See the dataset sites for download: 20 | 21 | - PASCAL VOC 2012: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/ 22 | - SBD: see [homepage](http://home.bharathh.info/home/sbd) or [direct download](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz) 23 | -------------------------------------------------------------------------------- /sublist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import h5py 4 | import random 5 | import linecache 6 | 7 | 8 | def mkdir(path): 9 | if not os.path.exists(path): 10 | os.makedirs(path) 11 | 12 | 13 | def dir2list(path,sub_list_file): 14 | if os.path.exists(sub_list_file): 15 | fp = open(sub_list_file, 'r') 16 | sublines = fp.readlines() 17 | sub_names = [] 18 | for subline in sublines: 19 | sub_info = subline.replace('\n', '') 20 | sub_names.append(sub_info) 21 | fp.close() 22 | return sub_names 23 | else: 24 | fp = open(sub_list_file, 'w') 25 | img_root_dir = os.path.join(path) 26 | subs = os.listdir(img_root_dir) 27 | subs.sort() 28 | for sub in subs: 29 | sub_dir = os.path.join(img_root_dir,sub) 30 | views = os.listdir(sub_dir) 31 | views.sort() 32 | for view in views: 33 | view_dir = os.path.join(sub_dir,view) 34 | slices = os.listdir(view_dir) 35 | slices.sort() 36 | for slice in slices: 37 | line = os.path.join(view_dir,slice) 38 | fp.write(line + "\n") 39 | fp.close() 40 | 41 | 42 | def equal_length_two_list(list_A, list_B): 43 | if len(list_A) 0: 48 | img = ImageOps.expand(img, border=self.padding, fill=0) 49 | 50 | w, h = img.size 51 | th, tw = self.size 52 | if w == tw and h == th: 53 | return img 54 | if i==0: 55 | x1 = random.randint(0, w - tw) 56 | y1 = random.randint(0, h - th) 57 | output.append(img.crop((x1, y1, x1 + tw, y1 + th))) 58 | return output -------------------------------------------------------------------------------- /data/custom_dataset_data_loader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from data.base_data_loader import BaseDataLoader 3 | 4 | 5 | def CreateDataset(opt): 6 | dataset = None 7 | if opt.dataset_mode == 'aligned': 8 | from data.aligned_dataset import AlignedDataset 9 | dataset = AlignedDataset() 10 | elif opt.dataset_mode == 'unaligned': 11 | from data.unaligned_dataset import UnalignedDataset 12 | dataset = UnalignedDataset() 13 | elif opt.dataset_mode == 'single': 14 | from data.single_dataset import SingleDataset 15 | dataset = SingleDataset() 16 | elif opt.dataset_mode == 'yh': 17 | from data.yh_dataset import yhDataset 18 | dataset = yhDataset() 19 | elif opt.dataset_mode == 'yh_seg': 20 | from data.yh_seg_dataset import yhSegDataset 21 | dataset = yhSegDataset() 22 | elif opt.dataset_mode == 'yh_seg_spleen': 23 | from data.yh_seg_spleenonly_dataset import yhSegDatasetSpleenOnly 24 | dataset = yhSegDatasetSpleenOnly() 25 | elif opt.dataset_mode == 'yh_test_seg': 26 | from data.yh_test_seg_dataset import yhTestSegDataset 27 | dataset = yhTestSegDataset() 28 | else: 29 | raise ValueError("Dataset [%s] not recognized." % opt.dataset_mode) 30 | 31 | print("dataset [%s] was created" % (dataset.name())) 32 | dataset.initialize(opt) 33 | return dataset 34 | 35 | 36 | class CustomDatasetDataLoader(BaseDataLoader): 37 | def name(self): 38 | return 'CustomDatasetDataLoader' 39 | 40 | def initialize(self, opt): 41 | BaseDataLoader.initialize(self, opt) 42 | self.dataset = CreateDataset(opt) 43 | self.dataloader = torch.utils.data.DataLoader( 44 | self.dataset, 45 | batch_size=opt.batchSize, 46 | shuffle=not opt.serial_batches, 47 | num_workers=int(opt.nThreads)) 48 | 49 | def load_data(self): 50 | return self.dataloader 51 | 52 | def __len__(self): 53 | return min(len(self.dataset), self.opt.max_dataset_size) 54 | -------------------------------------------------------------------------------- /data/aligned_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import random 3 | import torchvision.transforms as transforms 4 | import torch 5 | from data.base_dataset import BaseDataset 6 | from data.image_folder import make_dataset 7 | from PIL import Image 8 | 9 | 10 | class AlignedDataset(BaseDataset): 11 | def initialize(self, opt): 12 | self.opt = opt 13 | self.root = opt.dataroot 14 | self.dir_AB = os.path.join(opt.dataroot, opt.phase) 15 | 16 | self.AB_paths = sorted(make_dataset(self.dir_AB)) 17 | 18 | assert(opt.resize_or_crop == 'resize_and_crop') 19 | 20 | transform_list = [transforms.ToTensor(), 21 | transforms.Normalize((0.5, 0.5, 0.5), 22 | (0.5, 0.5, 0.5))] 23 | 24 | self.transform = transforms.Compose(transform_list) 25 | 26 | def __getitem__(self, index): 27 | AB_path = self.AB_paths[index] 28 | AB = Image.open(AB_path).convert('RGB') 29 | AB = AB.resize((self.opt.loadSize * 2, self.opt.loadSize), Image.BICUBIC) 30 | AB = self.transform(AB) 31 | 32 | w_total = AB.size(2) 33 | w = int(w_total / 2) 34 | h = AB.size(1) 35 | w_offset = random.randint(0, max(0, w - self.opt.fineSize - 1)) 36 | h_offset = random.randint(0, max(0, h - self.opt.fineSize - 1)) 37 | 38 | A = AB[:, h_offset:h_offset + self.opt.fineSize, 39 | w_offset:w_offset + self.opt.fineSize] 40 | B = AB[:, h_offset:h_offset + self.opt.fineSize, 41 | w + w_offset:w + w_offset + self.opt.fineSize] 42 | 43 | if (not self.opt.no_flip) and random.random() < 0.5: 44 | idx = [i for i in range(A.size(2) - 1, -1, -1)] 45 | idx = torch.LongTensor(idx) 46 | A = A.index_select(2, idx) 47 | B = B.index_select(2, idx) 48 | 49 | return {'A': A, 'B': B, 50 | 'A_paths': AB_path, 'B_paths': AB_path} 51 | 52 | def __len__(self): 53 | return len(self.AB_paths) 54 | 55 | def name(self): 56 | return 'AlignedDataset' 57 | -------------------------------------------------------------------------------- /models/test_seg_model.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Variable 2 | from collections import OrderedDict 3 | import util.util as util 4 | from .base_model import BaseModel 5 | from . import networks 6 | import torch 7 | 8 | class TestSegModel(BaseModel): 9 | def name(self): 10 | return 'TestModel' 11 | 12 | def initialize(self, opt): 13 | assert(not opt.isTrain) 14 | BaseModel.initialize(self, opt) 15 | self.input_A = self.Tensor(opt.batchSize, opt.input_nc, opt.fineSize, opt.fineSize) 16 | 17 | self.netG = networks.define_G(opt.input_nc, opt.output_nc, 18 | opt.ngf, opt.which_model_netG, 19 | opt.norm, not opt.no_dropout, 20 | self.gpu_ids) 21 | 22 | self.netG_seg = networks.define_G(opt.input_nc_seg, opt.output_nc_seg, 23 | opt.ngf, opt.which_model_netSeg, opt.norm, not opt.no_dropout, self.gpu_ids) 24 | 25 | 26 | 27 | which_epoch = opt.which_epoch 28 | self.load_network(self.netG, 'G_A', which_epoch) 29 | self.load_network(self.netG_seg, 'Seg_A', which_epoch) 30 | 31 | print('---------- Networks initialized -------------') 32 | # networks.print_network(self.netG) 33 | print('-----------------------------------------------') 34 | 35 | def set_input(self, input): 36 | # we need to use single_dataset mode 37 | input_A = input['A'] 38 | self.input_A.resize_(input_A.size()).copy_(input_A) 39 | self.image_paths = input['A_paths'] 40 | 41 | def test(self): 42 | self.real_A = Variable(self.input_A) 43 | self.fake_B = self.netG_seg.forward(self.real_A) 44 | 45 | # get image paths 46 | def get_image_paths(self): 47 | return self.image_paths 48 | 49 | def get_current_visuals(self): 50 | real_A = util.tensor2im(self.real_A.data) 51 | fake_B = util.tensor2seg(torch.max(self.fake_B.data,dim=1,keepdim=True)[1]) 52 | return OrderedDict([('real_A', real_A), ('fake_B', fake_B)]) 53 | -------------------------------------------------------------------------------- /util/html.py: -------------------------------------------------------------------------------- 1 | import dominate 2 | from dominate.tags import * 3 | import os 4 | 5 | 6 | class HTML: 7 | def __init__(self, web_dir, title, reflesh=0): 8 | self.title = title 9 | self.web_dir = web_dir 10 | self.img_dir = os.path.join(self.web_dir, 'images') 11 | if not os.path.exists(self.web_dir): 12 | os.makedirs(self.web_dir) 13 | if not os.path.exists(self.img_dir): 14 | os.makedirs(self.img_dir) 15 | # print(self.img_dir) 16 | 17 | self.doc = dominate.document(title=title) 18 | if reflesh > 0: 19 | with self.doc.head: 20 | meta(http_equiv="reflesh", content=str(reflesh)) 21 | 22 | def get_image_dir(self): 23 | return self.img_dir 24 | 25 | def add_header(self, str): 26 | with self.doc: 27 | h3(str) 28 | 29 | def add_table(self, border=1): 30 | self.t = table(border=border, style="table-layout: fixed;") 31 | self.doc.add(self.t) 32 | 33 | def add_images(self, ims, txts, links, width=400): 34 | self.add_table() 35 | with self.t: 36 | with tr(): 37 | for im, txt, link in zip(ims, txts, links): 38 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 39 | with p(): 40 | with a(href=os.path.join('images', link)): 41 | img(style="width:%dpx" % width, src=os.path.join('images', im)) 42 | br() 43 | p(txt) 44 | 45 | def save(self): 46 | html_file = '%s/index.html' % self.web_dir 47 | f = open(html_file, 'wt') 48 | f.write(self.doc.render()) 49 | f.close() 50 | 51 | 52 | if __name__ == '__main__': 53 | html = HTML('web/', 'test_html') 54 | html.add_header('hello world') 55 | 56 | ims = [] 57 | txts = [] 58 | links = [] 59 | for n in range(4): 60 | ims.append('image_%d.png' % n) 61 | txts.append('text_%d' % n) 62 | links.append('image_%d.png' % n) 63 | html.add_images(ims, txts, links) 64 | html.save() 65 | -------------------------------------------------------------------------------- /data/image_folder.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Code from 3 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py 4 | # Modified the original code so that it also loads images from the current 5 | # directory as well as the subdirectories 6 | ############################################################################### 7 | 8 | import torch.utils.data as data 9 | 10 | from PIL import Image 11 | import os 12 | import os.path 13 | 14 | IMG_EXTENSIONS = [ 15 | '.jpg', '.JPG', '.jpeg', '.JPEG', 16 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 17 | ] 18 | 19 | 20 | def is_image_file(filename): 21 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 22 | 23 | 24 | def make_dataset(dir): 25 | images = [] 26 | assert os.path.isdir(dir), '%s is not a valid directory' % dir 27 | 28 | for root, _, fnames in sorted(os.walk(dir)): 29 | for fname in fnames: 30 | if is_image_file(fname): 31 | path = os.path.join(root, fname) 32 | images.append(path) 33 | 34 | return images 35 | 36 | 37 | def default_loader(path): 38 | return Image.open(path).convert('RGB') 39 | 40 | 41 | class ImageFolder(data.Dataset): 42 | 43 | def __init__(self, root, transform=None, return_paths=False, 44 | loader=default_loader): 45 | imgs = make_dataset(root) 46 | if len(imgs) == 0: 47 | raise(RuntimeError("Found 0 images in: " + root + "\n" 48 | "Supported image extensions are: " + 49 | ",".join(IMG_EXTENSIONS))) 50 | 51 | self.root = root 52 | self.imgs = imgs 53 | self.transform = transform 54 | self.return_paths = return_paths 55 | self.loader = loader 56 | 57 | def __getitem__(self, index): 58 | path = self.imgs[index] 59 | img = self.loader(path) 60 | if self.transform is not None: 61 | img = self.transform(img) 62 | if self.return_paths: 63 | return img, path 64 | else: 65 | return img 66 | 67 | def __len__(self): 68 | return len(self.imgs) 69 | -------------------------------------------------------------------------------- /data/base_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | from PIL import Image 3 | import torchvision.transforms as transforms 4 | 5 | class BaseDataset(data.Dataset): 6 | def __init__(self): 7 | super(BaseDataset, self).__init__() 8 | 9 | def name(self): 10 | return 'BaseDataset' 11 | 12 | def initialize(self, opt): 13 | pass 14 | 15 | 16 | 17 | 18 | def get_transform(opt): 19 | transform_list = [] 20 | if opt.resize_or_crop == 'resize_and_crop': 21 | osize = [opt.loadSize, opt.loadSize] 22 | transform_list.append(transforms.Scale(osize, Image.BICUBIC)) 23 | transform_list.append(transforms.RandomCrop(opt.fineSize)) 24 | elif opt.resize_or_crop == 'crop': 25 | transform_list.append(transforms.RandomCrop(opt.fineSize)) 26 | elif opt.resize_or_crop == 'scale_width': 27 | transform_list.append(transforms.Lambda( 28 | lambda img: __scale_width(img, opt.fineSize))) 29 | elif opt.resize_or_crop == 'scale_width_and_crop': 30 | transform_list.append(transforms.Lambda( 31 | lambda img: __scale_width(img, opt.loadSize))) 32 | transform_list.append(transforms.RandomCrop(opt.fineSize)) 33 | elif opt.resize_or_crop == 'yh_test_resize': 34 | osize = [opt.fineSize, opt.fineSize] 35 | transform_list.append(transforms.Scale(osize, Image.BICUBIC)) 36 | # elif opt.resize_or_crop == 'resize': 37 | # osize = [opt.loadSize, opt.loadSize] 38 | # transform_list.append(transforms.Scale(osize, Image.BICUBIC)) 39 | # elif opt.resize_or_crop == 'random_crop': 40 | # transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize)) 41 | 42 | if opt.isTrain and not opt.no_flip: 43 | transform_list.append(transforms.RandomHorizontalFlip()) 44 | 45 | transform_list += [transforms.ToTensor(), 46 | transforms.Normalize((0.5, 0.5, 0.5), 47 | (0.5, 0.5, 0.5))] 48 | return transforms.Compose(transform_list) 49 | 50 | def __scale_width(img, target_width): 51 | ow, oh = img.size 52 | if (ow == target_width): 53 | return img 54 | w = target_width 55 | h = int(target_width * oh / ow) 56 | return img.resize((w, h), Image.BICUBIC) 57 | -------------------------------------------------------------------------------- /torchsrc/datasets/apc/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | from torch.utils import data 5 | 6 | 7 | class APC2016Base(data.Dataset): 8 | 9 | class_names = np.array([ 10 | 'background', 11 | 'barkely_hide_bones', 12 | 'cherokee_easy_tee_shirt', 13 | 'clorox_utility_brush', 14 | 'cloud_b_plush_bear', 15 | 'command_hooks', 16 | 'cool_shot_glue_sticks', 17 | 'crayola_24_ct', 18 | 'creativity_chenille_stems', 19 | 'dasani_water_bottle', 20 | 'dove_beauty_bar', 21 | 'dr_browns_bottle_brush', 22 | 'easter_turtle_sippy_cup', 23 | 'elmers_washable_no_run_school_glue', 24 | 'expo_dry_erase_board_eraser', 25 | 'fiskars_scissors_red', 26 | 'fitness_gear_3lb_dumbbell', 27 | 'folgers_classic_roast_coffee', 28 | 'hanes_tube_socks', 29 | 'i_am_a_bunny_book', 30 | 'jane_eyre_dvd', 31 | 'kleenex_paper_towels', 32 | 'kleenex_tissue_box', 33 | 'kyjen_squeakin_eggs_plush_puppies', 34 | 'laugh_out_loud_joke_book', 35 | 'oral_b_toothbrush_green', 36 | 'oral_b_toothbrush_red', 37 | 'peva_shower_curtain_liner', 38 | 'platinum_pets_dog_bowl', 39 | 'rawlings_baseball', 40 | 'rolodex_jumbo_pencil_cup', 41 | 'safety_first_outlet_plugs', 42 | 'scotch_bubble_mailer', 43 | 'scotch_duct_tape', 44 | 'soft_white_lightbulb', 45 | 'staples_index_cards', 46 | 'ticonderoga_12_pencils', 47 | 'up_glucose_bottle', 48 | 'womens_knit_gloves', 49 | 'woods_extension_cord', 50 | ]) 51 | mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434]) 52 | 53 | def transform(self, img, lbl): 54 | img = img[:, :, ::-1] # RGB -> BGR 55 | img = img.astype(np.float64) 56 | img -= self.mean_bgr 57 | img = img.transpose(2, 0, 1) 58 | img = torch.from_numpy(img).float() 59 | lbl = torch.from_numpy(lbl).long() 60 | return img, lbl 61 | 62 | def untransform(self, img, lbl): 63 | img = img.numpy() 64 | img = img.transpose(1, 2, 0) 65 | img += self.mean_bgr 66 | img = img.astype(np.uint8) 67 | img = img[:, :, ::-1] 68 | lbl = lbl.numpy() 69 | return img, lbl 70 | -------------------------------------------------------------------------------- /torchsrc/datasets/apc/rbo.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import os.path as osp 4 | import re 5 | 6 | import numpy as np 7 | import scipy.misc 8 | from sklearn.model_selection import train_test_split 9 | 10 | from base import APC2016Base 11 | 12 | 13 | class APC2016rbo(APC2016Base): 14 | 15 | def __init__(self, split='train', transform=False): 16 | assert split in ['train', 'valid', 'all'] 17 | self.split = split 18 | self._transform = transform 19 | self.dataset_dir = osp.expanduser('~/data/datasets/APC2016/APC2016rbo') 20 | data_ids = self._get_ids() 21 | ids_train, ids_valid = train_test_split( 22 | data_ids, test_size=0.25, random_state=1234) 23 | self._ids = {'train': ids_train, 'valid': ids_valid, 'all': data_ids} 24 | 25 | def __len__(self): 26 | return len(self._ids[self.split]) 27 | 28 | def _get_ids(self): 29 | ids = [] 30 | for img_file in os.listdir(self.dataset_dir): 31 | if not re.match(r'^.*_[0-9]*_bin_[a-l].jpg$', img_file): 32 | continue 33 | data_id = osp.splitext(img_file)[0] 34 | ids.append(data_id) 35 | return ids 36 | 37 | def _load_from_id(self, data_id): 38 | img_file = osp.join(self.dataset_dir, data_id + '.jpg') 39 | img = scipy.misc.imread(img_file) 40 | # generate label from mask files 41 | lbl = np.zeros(img.shape[:2], dtype=np.int32) 42 | # shelf bin mask file 43 | shelf_bin_mask_file = osp.join(self.dataset_dir, data_id + '.pbm') 44 | shelf_bin_mask = scipy.misc.imread(shelf_bin_mask_file, mode='L') 45 | lbl[shelf_bin_mask < 127] = -1 46 | # object mask files 47 | mask_glob = osp.join(self.dataset_dir, data_id + '_*.pbm') 48 | for mask_file in glob.glob(mask_glob): 49 | mask_id = osp.splitext(osp.basename(mask_file))[0] 50 | mask = scipy.misc.imread(mask_file, mode='L') 51 | lbl_name = mask_id[len(data_id + '_'):] 52 | lbl_id = np.where(self.class_names == lbl_name)[0] 53 | lbl[mask > 127] = lbl_id 54 | return img, lbl 55 | 56 | def __getitem__(self, index): 57 | data_id = self._ids[self.split][index] 58 | img, lbl = self._load_from_id(data_id) 59 | if self._transform: 60 | return self.transform(img, lbl) 61 | else: 62 | return img, lbl 63 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/score.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import caffe 3 | import numpy as np 4 | import os 5 | import sys 6 | from datetime import datetime 7 | from PIL import Image 8 | 9 | def fast_hist(a, b, n): 10 | k = (a >= 0) & (a < n) 11 | return np.bincount(n * a[k].astype(int) + b[k], minlength=n**2).reshape(n, n) 12 | 13 | def compute_hist(net, save_dir, dataset, layer='score', gt='label'): 14 | n_cl = net.blobs[layer].channels 15 | if save_dir: 16 | os.mkdir(save_dir) 17 | hist = np.zeros((n_cl, n_cl)) 18 | loss = 0 19 | for idx in dataset: 20 | net.forward() 21 | hist += fast_hist(net.blobs[gt].data[0, 0].flatten(), 22 | net.blobs[layer].data[0].argmax(0).flatten(), 23 | n_cl) 24 | 25 | if save_dir: 26 | im = Image.fromarray(net.blobs[layer].data[0].argmax(0).astype(np.uint8), mode='P') 27 | im.save(os.path.join(save_dir, idx + '.png')) 28 | # compute the loss as well 29 | loss += net.blobs['loss'].data.flat[0] 30 | return hist, loss / len(dataset) 31 | 32 | def seg_tests(solver, save_format, dataset, layer='score', gt='label'): 33 | print '>>>', datetime.now(), 'Begin seg tests' 34 | solver.test_nets[0].share_with(solver.net) 35 | do_seg_tests(solver.test_nets[0], solver.iter, save_format, dataset, layer, gt) 36 | 37 | def do_seg_tests(net, iter, save_format, dataset, layer='score', gt='label'): 38 | n_cl = net.blobs[layer].channels 39 | if save_format: 40 | save_format = save_format.format(iter) 41 | hist, loss = compute_hist(net, save_format, dataset, layer, gt) 42 | # mean loss 43 | print '>>>', datetime.now(), 'Iteration', iter, 'loss', loss 44 | # overall accuracy 45 | acc = np.diag(hist).sum() / hist.sum() 46 | print '>>>', datetime.now(), 'Iteration', iter, 'overall accuracy', acc 47 | # per-class accuracy 48 | acc = np.diag(hist) / hist.sum(1) 49 | print '>>>', datetime.now(), 'Iteration', iter, 'mean accuracy', np.nanmean(acc) 50 | # per-class IU 51 | iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) 52 | print '>>>', datetime.now(), 'Iteration', iter, 'mean IU', np.nanmean(iu) 53 | freq = hist.sum(1) / hist.sum() 54 | print '>>>', datetime.now(), 'Iteration', iter, 'fwavacc', \ 55 | (freq[freq > 0] * iu[freq > 0]).sum() 56 | return hist 57 | -------------------------------------------------------------------------------- /torchsrc/utils/util.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import numpy as np 4 | from PIL import Image 5 | import inspect, re 6 | import numpy as np 7 | import os 8 | import collections 9 | 10 | # Converts a Tensor into a Numpy array 11 | # |imtype|: the desired type of the converted numpy array 12 | def tensor2im(image_tensor, imtype=np.uint8): 13 | image_numpy = image_tensor[0].cpu().float().numpy() 14 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 15 | return image_numpy.astype(imtype) 16 | 17 | 18 | def diagnose_network(net, name='network'): 19 | mean = 0.0 20 | count = 0 21 | for param in net.parameters(): 22 | if param.grad is not None: 23 | mean += torch.mean(torch.abs(param.grad.data)) 24 | count += 1 25 | if count > 0: 26 | mean = mean / count 27 | print(name) 28 | print(mean) 29 | 30 | 31 | def save_image(image_numpy, image_path): 32 | image_pil = Image.fromarray(image_numpy) 33 | image_pil.save(image_path) 34 | 35 | def info(object, spacing=10, collapse=1): 36 | """Print methods and doc strings. 37 | Takes module, class, list, dictionary, or string.""" 38 | methodList = [e for e in dir(object) if isinstance(getattr(object, e), collections.Callable)] 39 | processFunc = collapse and (lambda s: " ".join(s.split())) or (lambda s: s) 40 | print( "\n".join(["%s %s" % 41 | (method.ljust(spacing), 42 | processFunc(str(getattr(object, method).__doc__))) 43 | for method in methodList]) ) 44 | 45 | def varname(p): 46 | for line in inspect.getframeinfo(inspect.currentframe().f_back)[3]: 47 | m = re.search(r'\bvarname\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)', line) 48 | if m: 49 | return m.group(1) 50 | 51 | def print_numpy(x, val=True, shp=False): 52 | x = x.astype(np.float64) 53 | if shp: 54 | print('shape,', x.shape) 55 | if val: 56 | x = x.flatten() 57 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % ( 58 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x))) 59 | 60 | 61 | def mkdirs(paths): 62 | if isinstance(paths, list) and not isinstance(paths, str): 63 | for path in paths: 64 | mkdir(path) 65 | else: 66 | mkdir(paths) 67 | 68 | 69 | def mkdir(path): 70 | if not os.path.exists(path): 71 | os.makedirs(path) 72 | -------------------------------------------------------------------------------- /options/train_options.py: -------------------------------------------------------------------------------- 1 | from .base_options import BaseOptions 2 | 3 | 4 | class TrainOptions(BaseOptions): 5 | def initialize(self): 6 | BaseOptions.initialize(self) 7 | self.parser.add_argument('--display_freq', type=int, default=100, help='frequency of showing training results on screen') 8 | self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console') 9 | self.parser.add_argument('--save_latest_freq', type=int, default=5000, help='frequency of saving the latest results') 10 | self.parser.add_argument('--save_epoch_freq', type=int, default=5, help='frequency of saving checkpoints at the end of epochs') 11 | self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model') 12 | self.parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by , +, ...') 13 | self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') 14 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') 15 | self.parser.add_argument('--niter', type=int, default=100, help='# of iter at starting learning rate') 16 | self.parser.add_argument('--niter_decay', type=int, default=100, help='# of iter to linearly decay learning rate to zero') 17 | self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') 18 | self.parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam') 19 | self.parser.add_argument('--seg_lr', type=float, default=0.0001, help='initial learning rate for adam for segmentation') 20 | self.parser.add_argument('--no_lsgan', action='store_true', help='do *not* use least square GAN, if false, use vanilla GAN') 21 | self.parser.add_argument('--lambda_A', type=float, default=10.0, help='weight for cycle loss (A -> B -> A)') 22 | self.parser.add_argument('--lambda_B', type=float, default=10.0, help='weight for cycle loss (B -> A -> B)') 23 | self.parser.add_argument('--pool_size', type=int, default=50, help='the size of image buffer that stores previously generated images') 24 | self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/') 25 | self.isTrain = True 26 | -------------------------------------------------------------------------------- /util/util.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import numpy as np 4 | from PIL import Image 5 | import inspect, re 6 | import numpy as np 7 | import os 8 | import collections 9 | 10 | # Converts a Tensor into a Numpy array 11 | # |imtype|: the desired type of the converted numpy array 12 | def tensor2im(image_tensor, imtype=np.uint8): 13 | image_numpy = image_tensor[0].cpu().float().numpy() 14 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 15 | return image_numpy.astype(imtype) 16 | 17 | def tensor2seg(image_tensor, imtype=np.uint8): 18 | image_numpy = image_tensor[0].cpu().float().numpy() 19 | image_numpy = np.transpose(image_numpy, (1, 2, 0)) *20 20 | return image_numpy.astype(imtype) 21 | 22 | def diagnose_network(net, name='network'): 23 | mean = 0.0 24 | count = 0 25 | for param in net.parameters(): 26 | if param.grad is not None: 27 | mean += torch.mean(torch.abs(param.grad.data)) 28 | count += 1 29 | if count > 0: 30 | mean = mean / count 31 | print(name) 32 | print(mean) 33 | 34 | 35 | def save_image(image_numpy, image_path): 36 | if (len(image_numpy.shape)>2): 37 | image_pil = Image.fromarray(image_numpy[:,:,0]) 38 | else: 39 | image_pil = Image.fromarray(image_numpy) 40 | image_pil.save(image_path) 41 | 42 | def info(object, spacing=10, collapse=1): 43 | """Print methods and doc strings. 44 | Takes module, class, list, dictionary, or string.""" 45 | methodList = [e for e in dir(object) if isinstance(getattr(object, e), collections.Callable)] 46 | processFunc = collapse and (lambda s: " ".join(s.split())) or (lambda s: s) 47 | print( "\n".join(["%s %s" % 48 | (method.ljust(spacing), 49 | processFunc(str(getattr(object, method).__doc__))) 50 | for method in methodList]) ) 51 | 52 | def varname(p): 53 | for line in inspect.getframeinfo(inspect.currentframe().f_back)[3]: 54 | m = re.search(r'\bvarname\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)', line) 55 | if m: 56 | return m.group(1) 57 | 58 | def print_numpy(x, val=True, shp=False): 59 | x = x.astype(np.float64) 60 | if shp: 61 | print('shape,', x.shape) 62 | if val: 63 | x = x.flatten() 64 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % ( 65 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x))) 66 | 67 | 68 | def mkdirs(paths): 69 | if isinstance(paths, list) and not isinstance(paths, str): 70 | for path in paths: 71 | mkdir(path) 72 | else: 73 | mkdir(paths) 74 | 75 | 76 | def mkdir(path): 77 | if not os.path.exists(path): 78 | os.makedirs(path) 79 | -------------------------------------------------------------------------------- /torchsrc/datasets/apc/mit_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | 4 | import numpy as np 5 | import skimage.io 6 | import yaml 7 | 8 | from base import APC2016Base 9 | 10 | 11 | here = osp.dirname(osp.abspath(__file__)) 12 | 13 | 14 | class APC2016mit_training(APC2016Base): 15 | 16 | dataset_dir = osp.expanduser('~/data/datasets/APC2016/training') 17 | 18 | def __init__(self, transform=False): 19 | self._transform = transform 20 | # drop by blacklist 21 | self._ids = [] 22 | with open(osp.join(here, 'data/mit_training_blacklist.yaml')) as f: 23 | blacklist = yaml.load(f) 24 | for index, data_id in enumerate(self._get_ids()): 25 | if index in blacklist: 26 | print('WARNING: skipping index=%d data' % index) 27 | continue 28 | self._ids.append(data_id) 29 | 30 | def __len__(self): 31 | return len(self._ids) 32 | 33 | @classmethod 34 | def _get_ids(cls): 35 | for loc in ['shelf', 'tote']: 36 | loc_dir = osp.join(cls.dataset_dir, loc) 37 | for cls_id, cls_name in enumerate(cls.class_names): 38 | if cls_id == 0: # background 39 | continue 40 | cls_dir = osp.join(loc_dir, cls_name) 41 | scene_dir_empty = osp.join(cls_dir, 'scene-empty') 42 | for scene_dir in os.listdir(cls_dir): 43 | scene_dir = osp.join(cls_dir, scene_dir) 44 | for frame_id in xrange(0, 18): 45 | empty_file = osp.join( 46 | scene_dir_empty, 'frame-%06d.color.png' % frame_id) 47 | rgb_file = osp.join( 48 | scene_dir, 'frame-%06d.color.png' % frame_id) 49 | mask_file = osp.join( 50 | scene_dir, 'masks', 51 | 'frame-%06d.mask.png' % frame_id) 52 | if osp.exists(rgb_file) and osp.exists(mask_file): 53 | yield empty_file, rgb_file, mask_file, cls_id 54 | 55 | @staticmethod 56 | def _load_from_id(data_id): 57 | empty_file, rgb_file, mask_file, cls_id = data_id 58 | img = skimage.io.imread(rgb_file) 59 | img_empty = skimage.io.imread(empty_file) 60 | mask = skimage.io.imread(mask_file, as_grey=True) >= 0.5 61 | lbl = np.zeros(mask.shape, dtype=np.int32) 62 | lbl[mask] = cls_id 63 | img_empty[mask] = img[mask] 64 | return img_empty, lbl 65 | 66 | def __getitem__(self, index): 67 | data_id = self._ids[index] 68 | img, lbl = self._load_from_id(data_id) 69 | if self._transform: 70 | return self.transform(img, lbl) 71 | else: 72 | return img, lbl 73 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/net.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('../../python') 3 | 4 | import caffe 5 | from caffe import layers as L, params as P 6 | from caffe.coord_map import crop 7 | 8 | def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1): 9 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 10 | num_output=nout, pad=pad, group=group) 11 | return conv, L.ReLU(conv, in_place=True) 12 | 13 | def max_pool(bottom, ks, stride=1): 14 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 15 | 16 | def fcn(split): 17 | n = caffe.NetSpec() 18 | pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), 19 | seed=1337) 20 | if split == 'train': 21 | pydata_params['sbdd_dir'] = '../data/sbdd/dataset' 22 | pylayer = 'SBDDSegDataLayer' 23 | else: 24 | pydata_params['voc_dir'] = '../data/pascal/VOC2011' 25 | pylayer = 'VOCSegDataLayer' 26 | n.data, n.label = L.Python(module='voc_layers', layer=pylayer, 27 | ntop=2, param_str=str(pydata_params)) 28 | 29 | # the base net 30 | n.conv1, n.relu1 = conv_relu(n.data, 11, 96, stride=4, pad=100) 31 | n.pool1 = max_pool(n.relu1, 3, stride=2) 32 | n.norm1 = L.LRN(n.pool1, local_size=5, alpha=1e-4, beta=0.75) 33 | n.conv2, n.relu2 = conv_relu(n.norm1, 5, 256, pad=2, group=2) 34 | n.pool2 = max_pool(n.relu2, 3, stride=2) 35 | n.norm2 = L.LRN(n.pool2, local_size=5, alpha=1e-4, beta=0.75) 36 | n.conv3, n.relu3 = conv_relu(n.norm2, 3, 384, pad=1) 37 | n.conv4, n.relu4 = conv_relu(n.relu3, 3, 384, pad=1, group=2) 38 | n.conv5, n.relu5 = conv_relu(n.relu4, 3, 256, pad=1, group=2) 39 | n.pool5 = max_pool(n.relu5, 3, stride=2) 40 | 41 | # fully conv 42 | n.fc6, n.relu6 = conv_relu(n.pool5, 6, 4096) 43 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 44 | n.fc7, n.relu7 = conv_relu(n.drop6, 1, 4096) 45 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 46 | 47 | n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0, 48 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 49 | n.upscore = L.Deconvolution(n.score_fr, 50 | convolution_param=dict(num_output=21, kernel_size=63, stride=32, 51 | bias_term=False), 52 | param=[dict(lr_mult=0)]) 53 | n.score = crop(n.upscore, n.data) 54 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 55 | loss_param=dict(normalize=True, ignore_label=255)) 56 | 57 | return n.to_proto() 58 | 59 | def make_net(): 60 | with open('train.prototxt', 'w') as f: 61 | f.write(str(fcn('train'))) 62 | 63 | with open('val.prototxt', 'w') as f: 64 | f.write(str(fcn('seg11valid'))) 65 | 66 | if __name__ == '__main__': 67 | make_net() 68 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/surgery.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import caffe 3 | import numpy as np 4 | 5 | def transplant(new_net, net, suffix=''): 6 | """ 7 | Transfer weights by copying matching parameters, coercing parameters of 8 | incompatible shape, and dropping unmatched parameters. 9 | 10 | The coercion is useful to convert fully connected layers to their 11 | equivalent convolutional layers, since the weights are the same and only 12 | the shapes are different. In particular, equivalent fully connected and 13 | convolution layers have shapes O x I and O x I x H x W respectively for O 14 | outputs channels, I input channels, H kernel height, and W kernel width. 15 | 16 | Both `net` to `new_net` arguments must be instantiated `caffe.Net`s. 17 | """ 18 | for p in net.params: 19 | p_new = p + suffix 20 | if p_new not in new_net.params: 21 | print 'dropping', p 22 | continue 23 | for i in range(len(net.params[p])): 24 | if i > (len(new_net.params[p_new]) - 1): 25 | print 'dropping', p, i 26 | break 27 | if net.params[p][i].data.shape != new_net.params[p_new][i].data.shape: 28 | print 'coercing', p, i, 'from', net.params[p][i].data.shape, 'to', new_net.params[p_new][i].data.shape 29 | else: 30 | print 'copying', p, ' -> ', p_new, i 31 | new_net.params[p_new][i].data.flat = net.params[p][i].data.flat 32 | 33 | def upsample_filt(size): 34 | """ 35 | Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size. 36 | """ 37 | factor = (size + 1) // 2 38 | if size % 2 == 1: 39 | center = factor - 1 40 | else: 41 | center = factor - 0.5 42 | og = np.ogrid[:size, :size] 43 | return (1 - abs(og[0] - center) / factor) * \ 44 | (1 - abs(og[1] - center) / factor) 45 | 46 | def interp(net, layers): 47 | """ 48 | Set weights of each layer in layers to bilinear kernels for interpolation. 49 | """ 50 | for l in layers: 51 | m, k, h, w = net.params[l][0].data.shape 52 | if m != k and k != 1: 53 | print 'input + output channels need to be the same or |output| == 1' 54 | raise 55 | if h != w: 56 | print 'filters need to be square' 57 | raise 58 | filt = upsample_filt(h) 59 | net.params[l][0].data[range(m), range(k), :, :] = filt 60 | 61 | def expand_score(new_net, new_layer, net, layer): 62 | """ 63 | Transplant an old score layer's parameters, with k < k' classes, into a new 64 | score layer with k classes s.t. the first k' are the old classes. 65 | """ 66 | old_cl = net.params[layer][0].num 67 | new_net.params[new_layer][0].data[:old_cl][...] = net.params[layer][0].data 68 | new_net.params[new_layer][1].data[0,0,0,:old_cl][...] = net.params[layer][1].data 69 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split, tops): 15 | n = caffe.NetSpec() 16 | n.data, n.label = L.Python(module='nyud_layers', 17 | layer='NYUDSegDataLayer', ntop=2, 18 | param_str=str(dict(nyud_dir='../data/nyud', split=split, 19 | tops=tops, seed=1337))) 20 | 21 | # the base net 22 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) 23 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 24 | n.pool1 = max_pool(n.relu1_2) 25 | 26 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 27 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 28 | n.pool2 = max_pool(n.relu2_2) 29 | 30 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 31 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 32 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 33 | n.pool3 = max_pool(n.relu3_3) 34 | 35 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 36 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 37 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 38 | n.pool4 = max_pool(n.relu4_3) 39 | 40 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 41 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 42 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 43 | n.pool5 = max_pool(n.relu5_3) 44 | 45 | # fully conv 46 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 47 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 48 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 49 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 50 | 51 | n.score_fr = L.Convolution(n.drop7, num_output=40, kernel_size=1, pad=0, 52 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 53 | n.upscore = L.Deconvolution(n.score_fr, 54 | convolution_param=dict(num_output=40, kernel_size=64, stride=32, 55 | bias_term=False), 56 | param=[dict(lr_mult=0)]) 57 | n.score = crop(n.upscore, n.data) 58 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 59 | loss_param=dict(normalize=False, ignore_label=255)) 60 | 61 | return n.to_proto() 62 | 63 | def make_net(): 64 | tops = ['hha', 'label'] 65 | with open('trainval.prototxt', 'w') as f: 66 | f.write(str(fcn('trainval', tops))) 67 | 68 | with open('test.prototxt', 'w') as f: 69 | f.write(str(fcn('test', tops))) 70 | 71 | if __name__ == '__main__': 72 | make_net() 73 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split, tops): 15 | n = caffe.NetSpec() 16 | n.data, n.label = L.Python(module='nyud_layers', 17 | layer='NYUDSegDataLayer', ntop=2, 18 | param_str=str(dict(nyud_dir='../data/nyud', split=split, 19 | tops=tops, seed=1337))) 20 | 21 | # the base net 22 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) 23 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 24 | n.pool1 = max_pool(n.relu1_2) 25 | 26 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 27 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 28 | n.pool2 = max_pool(n.relu2_2) 29 | 30 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 31 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 32 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 33 | n.pool3 = max_pool(n.relu3_3) 34 | 35 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 36 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 37 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 38 | n.pool4 = max_pool(n.relu4_3) 39 | 40 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 41 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 42 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 43 | n.pool5 = max_pool(n.relu5_3) 44 | 45 | # fully conv 46 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 47 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 48 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 49 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 50 | 51 | n.score_fr = L.Convolution(n.drop7, num_output=40, kernel_size=1, pad=0, 52 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 53 | n.upscore = L.Deconvolution(n.score_fr, 54 | convolution_param=dict(num_output=40, kernel_size=64, stride=32, 55 | bias_term=False), 56 | param=[dict(lr_mult=0)]) 57 | n.score = crop(n.upscore, n.data) 58 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 59 | loss_param=dict(normalize=False, ignore_label=255)) 60 | 61 | return n.to_proto() 62 | 63 | def make_net(): 64 | tops = ['color', 'label'] 65 | with open('trainval.prototxt', 'w') as f: 66 | f.write(str(fcn('trainval', tops))) 67 | 68 | with open('test.prototxt', 'w') as f: 69 | f.write(str(fcn('test', tops))) 70 | 71 | if __name__ == '__main__': 72 | make_net() 73 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split): 15 | n = caffe.NetSpec() 16 | n.data, n.label = L.Python(module='pascalcontext_layers', 17 | layer='PASCALContextSegDataLayer', ntop=2, 18 | param_str=str(dict(voc_dir='../../data/pascal', 19 | context_dir='../../data/pascal-context', split=split, 20 | seed=1337))) 21 | 22 | # the base net 23 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) 24 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 25 | n.pool1 = max_pool(n.relu1_2) 26 | 27 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 28 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 29 | n.pool2 = max_pool(n.relu2_2) 30 | 31 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 32 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 33 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 34 | n.pool3 = max_pool(n.relu3_3) 35 | 36 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 37 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 38 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 39 | n.pool4 = max_pool(n.relu4_3) 40 | 41 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 42 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 43 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 44 | n.pool5 = max_pool(n.relu5_3) 45 | 46 | # fully conv 47 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 48 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 49 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 50 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 51 | 52 | n.score_fr = L.Convolution(n.drop7, num_output=60, kernel_size=1, pad=0, 53 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 54 | n.upscore = L.Deconvolution(n.score_fr, 55 | convolution_param=dict(num_output=60, kernel_size=64, stride=32, 56 | bias_term=False), 57 | param=[dict(lr_mult=0)]) 58 | n.score = crop(n.upscore, n.data) 59 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 60 | loss_param=dict(normalize=False, ignore_label=255)) 61 | 62 | 63 | return n.to_proto() 64 | 65 | def make_net(): 66 | with open('train.prototxt', 'w') as f: 67 | f.write(str(fcn('train'))) 68 | 69 | with open('val.prototxt', 'w') as f: 70 | f.write(str(fcn('val'))) 71 | 72 | if __name__ == '__main__': 73 | make_net() 74 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split, tops): 15 | n = caffe.NetSpec() 16 | n.color, n.depth, n.label = L.Python(module='nyud_layers', 17 | layer='NYUDSegDataLayer', ntop=3, 18 | param_str=str(dict(nyud_dir='../data/nyud', split=split, 19 | tops=tops, seed=1337))) 20 | n.data = L.Concat(n.color, n.depth) 21 | 22 | # the base net 23 | n.conv1_1_bgrd, n.relu1_1 = conv_relu(n.data, 64, pad=100) 24 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 25 | n.pool1 = max_pool(n.relu1_2) 26 | 27 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 28 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 29 | n.pool2 = max_pool(n.relu2_2) 30 | 31 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 32 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 33 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 34 | n.pool3 = max_pool(n.relu3_3) 35 | 36 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 37 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 38 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 39 | n.pool4 = max_pool(n.relu4_3) 40 | 41 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 42 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 43 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 44 | n.pool5 = max_pool(n.relu5_3) 45 | 46 | # fully conv 47 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 48 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 49 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 50 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 51 | 52 | n.score_fr = L.Convolution(n.drop7, num_output=40, kernel_size=1, pad=0, 53 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 54 | n.upscore = L.Deconvolution(n.score_fr, 55 | convolution_param=dict(num_output=40, kernel_size=64, stride=32, 56 | bias_term=False), 57 | param=[dict(lr_mult=0)]) 58 | n.score = crop(n.upscore, n.data) 59 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 60 | loss_param=dict(normalize=False, ignore_label=255)) 61 | 62 | return n.to_proto() 63 | 64 | def make_net(): 65 | tops = ['color', 'depth', 'label'] 66 | with open('trainval.prototxt', 'w') as f: 67 | f.write(str(fcn('trainval', tops))) 68 | 69 | with open('test.prototxt', 'w') as f: 70 | f.write(str(fcn('test', tops))) 71 | 72 | if __name__ == '__main__': 73 | make_net() 74 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split): 15 | n = caffe.NetSpec() 16 | pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), 17 | seed=1337) 18 | if split == 'train': 19 | pydata_params['sbdd_dir'] = '../data/sbdd/dataset' 20 | pylayer = 'SBDDSegDataLayer' 21 | else: 22 | pydata_params['voc_dir'] = '../data/pascal/VOC2011' 23 | pylayer = 'VOCSegDataLayer' 24 | n.data, n.label = L.Python(module='voc_layers', layer=pylayer, 25 | ntop=2, param_str=str(pydata_params)) 26 | 27 | # the base net 28 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) 29 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 30 | n.pool1 = max_pool(n.relu1_2) 31 | 32 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 33 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 34 | n.pool2 = max_pool(n.relu2_2) 35 | 36 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 37 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 38 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 39 | n.pool3 = max_pool(n.relu3_3) 40 | 41 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 42 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 43 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 44 | n.pool4 = max_pool(n.relu4_3) 45 | 46 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 47 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 48 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 49 | n.pool5 = max_pool(n.relu5_3) 50 | 51 | # fully conv 52 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 53 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 54 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 55 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 56 | n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0, 57 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 58 | n.upscore = L.Deconvolution(n.score_fr, 59 | convolution_param=dict(num_output=21, kernel_size=64, stride=32, 60 | bias_term=False), 61 | param=[dict(lr_mult=0)]) 62 | n.score = crop(n.upscore, n.data) 63 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 64 | loss_param=dict(normalize=False, ignore_label=255)) 65 | 66 | return n.to_proto() 67 | 68 | def make_net(): 69 | with open('train.prototxt', 'w') as f: 70 | f.write(str(fcn('train'))) 71 | 72 | with open('val.prototxt', 'w') as f: 73 | f.write(str(fcn('seg11valid'))) 74 | 75 | if __name__ == '__main__': 76 | make_net() 77 | -------------------------------------------------------------------------------- /LICENSE1: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018, Yuankai Huo 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | 26 | --------------------------- LICENSE FOR EssNet -------------------------------- 27 | BSD License 28 | 29 | For EssNet software 30 | Copyright (c) 2018, Yuankai Huo 31 | All rights reserved. 32 | 33 | Redistribution and use in source and binary forms, with or without 34 | modification, are permitted provided that the following conditions are met: 35 | 36 | * Redistributions of source code must retain the above copyright notice, this 37 | list of conditions and the following disclaimer. 38 | 39 | * Redistributions in binary form must reproduce the above copyright notice, 40 | this list of conditions and the following disclaimer in the documentation 41 | and/or other materials provided with the distribution. 42 | 43 | 44 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 45 | 46 | Neither the name Facebook nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 47 | 48 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 49 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split): 15 | n = caffe.NetSpec() 16 | n.data, n.label = L.Python(module='pascalcontext_layers', 17 | layer='PASCALContextSegDataLayer', ntop=2, 18 | param_str=str(dict(voc_dir='../../data/pascal', 19 | context_dir='../../data/pascal-context', split=split, 20 | seed=1337))) 21 | 22 | # the base net 23 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) 24 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 25 | n.pool1 = max_pool(n.relu1_2) 26 | 27 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 28 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 29 | n.pool2 = max_pool(n.relu2_2) 30 | 31 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 32 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 33 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 34 | n.pool3 = max_pool(n.relu3_3) 35 | 36 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 37 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 38 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 39 | n.pool4 = max_pool(n.relu4_3) 40 | 41 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 42 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 43 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 44 | n.pool5 = max_pool(n.relu5_3) 45 | 46 | # fully conv 47 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 48 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 49 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 50 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 51 | 52 | n.score_fr = L.Convolution(n.drop7, num_output=60, kernel_size=1, pad=0, 53 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 54 | n.upscore2 = L.Deconvolution(n.score_fr, 55 | convolution_param=dict(num_output=60, kernel_size=4, stride=2, 56 | bias_term=False), 57 | param=[dict(lr_mult=0)]) 58 | 59 | n.score_pool4 = L.Convolution(n.pool4, num_output=60, kernel_size=1, pad=0, 60 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 61 | n.score_pool4c = crop(n.score_pool4, n.upscore2) 62 | n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c, 63 | operation=P.Eltwise.SUM) 64 | n.upscore16 = L.Deconvolution(n.fuse_pool4, 65 | convolution_param=dict(num_output=60, kernel_size=32, stride=16, 66 | bias_term=False), 67 | param=[dict(lr_mult=0)]) 68 | 69 | n.score = crop(n.upscore16, n.data) 70 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 71 | loss_param=dict(normalize=False, ignore_label=255)) 72 | 73 | return n.to_proto() 74 | 75 | def make_net(): 76 | with open('train.prototxt', 'w') as f: 77 | f.write(str(fcn('train'))) 78 | 79 | with open('val.prototxt', 'w') as f: 80 | f.write(str(fcn('val'))) 81 | 82 | if __name__ == '__main__': 83 | make_net() 84 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split): 15 | n = caffe.NetSpec() 16 | n.data, n.sem, n.geo = L.Python(module='siftflow_layers', 17 | layer='SIFTFlowSegDataLayer', ntop=3, 18 | param_str=str(dict(siftflow_dir='../data/sift-flow', 19 | split=split, seed=1337))) 20 | 21 | # the base net 22 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) 23 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 24 | n.pool1 = max_pool(n.relu1_2) 25 | 26 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 27 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 28 | n.pool2 = max_pool(n.relu2_2) 29 | 30 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 31 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 32 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 33 | n.pool3 = max_pool(n.relu3_3) 34 | 35 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 36 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 37 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 38 | n.pool4 = max_pool(n.relu4_3) 39 | 40 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 41 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 42 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 43 | n.pool5 = max_pool(n.relu5_3) 44 | 45 | # fully conv 46 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 47 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 48 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 49 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 50 | 51 | n.score_fr_sem = L.Convolution(n.drop7, num_output=33, kernel_size=1, pad=0, 52 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 53 | n.upscore_sem = L.Deconvolution(n.score_fr_sem, 54 | convolution_param=dict(num_output=33, kernel_size=64, stride=32, 55 | bias_term=False), 56 | param=[dict(lr_mult=0)]) 57 | n.score_sem = crop(n.upscore_sem, n.data) 58 | # loss to make score happy (o.w. loss_sem) 59 | n.loss = L.SoftmaxWithLoss(n.score_sem, n.sem, 60 | loss_param=dict(normalize=False, ignore_label=255)) 61 | 62 | n.score_fr_geo = L.Convolution(n.drop7, num_output=3, kernel_size=1, pad=0, 63 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 64 | n.upscore_geo = L.Deconvolution(n.score_fr_geo, 65 | convolution_param=dict(num_output=3, kernel_size=64, stride=32, 66 | bias_term=False), 67 | param=[dict(lr_mult=0)]) 68 | n.score_geo = crop(n.upscore_geo, n.data) 69 | n.loss_geo = L.SoftmaxWithLoss(n.score_geo, n.geo, 70 | loss_param=dict(normalize=False, ignore_label=255)) 71 | 72 | return n.to_proto() 73 | 74 | def make_net(): 75 | with open('trainval.prototxt', 'w') as f: 76 | f.write(str(fcn('trainval'))) 77 | 78 | with open('test.prototxt', 'w') as f: 79 | f.write(str(fcn('test'))) 80 | 81 | if __name__ == '__main__': 82 | make_net() 83 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split): 15 | n = caffe.NetSpec() 16 | pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), 17 | seed=1337) 18 | if split == 'train': 19 | pydata_params['sbdd_dir'] = '../../data/sbdd/dataset' 20 | pylayer = 'SBDDSegDataLayer' 21 | else: 22 | pydata_params['voc_dir'] = '../../data/pascal/VOC2011' 23 | pylayer = 'VOCSegDataLayer' 24 | n.data, n.label = L.Python(module='voc_layers', layer=pylayer, 25 | ntop=2, param_str=str(pydata_params)) 26 | 27 | # the base net 28 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) 29 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 30 | n.pool1 = max_pool(n.relu1_2) 31 | 32 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 33 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 34 | n.pool2 = max_pool(n.relu2_2) 35 | 36 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 37 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 38 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 39 | n.pool3 = max_pool(n.relu3_3) 40 | 41 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 42 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 43 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 44 | n.pool4 = max_pool(n.relu4_3) 45 | 46 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 47 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 48 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 49 | n.pool5 = max_pool(n.relu5_3) 50 | 51 | # fully conv 52 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 53 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 54 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 55 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 56 | n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0, 57 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 58 | n.upscore2 = L.Deconvolution(n.score_fr, 59 | convolution_param=dict(num_output=21, kernel_size=4, stride=2, 60 | bias_term=False), 61 | param=[dict(lr_mult=0)]) 62 | 63 | n.score_pool4 = L.Convolution(n.pool4, num_output=21, kernel_size=1, pad=0, 64 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 65 | n.score_pool4c = crop(n.score_pool4, n.upscore2) 66 | n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c, 67 | operation=P.Eltwise.SUM) 68 | n.upscore16 = L.Deconvolution(n.fuse_pool4, 69 | convolution_param=dict(num_output=21, kernel_size=32, stride=16, 70 | bias_term=False), 71 | param=[dict(lr_mult=0)]) 72 | 73 | n.score = crop(n.upscore16, n.data) 74 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 75 | loss_param=dict(normalize=False, ignore_label=255)) 76 | 77 | return n.to_proto() 78 | 79 | def make_net(): 80 | with open('train.prototxt', 'w') as f: 81 | f.write(str(fcn('train'))) 82 | 83 | with open('val.prototxt', 'w') as f: 84 | f.write(str(fcn('seg11valid'))) 85 | 86 | if __name__ == '__main__': 87 | make_net() 88 | -------------------------------------------------------------------------------- /LICENSE2: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Jun-Yan Zhu and Taesung Park 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | 26 | --------------------------- LICENSE FOR pix2pix -------------------------------- 27 | BSD License 28 | 29 | For pix2pix software 30 | Copyright (c) 2016, Phillip Isola and Jun-Yan Zhu 31 | All rights reserved. 32 | 33 | Redistribution and use in source and binary forms, with or without 34 | modification, are permitted provided that the following conditions are met: 35 | 36 | * Redistributions of source code must retain the above copyright notice, this 37 | list of conditions and the following disclaimer. 38 | 39 | * Redistributions in binary form must reproduce the above copyright notice, 40 | this list of conditions and the following disclaimer in the documentation 41 | and/or other materials provided with the distribution. 42 | 43 | ----------------------------- LICENSE FOR DCGAN -------------------------------- 44 | BSD License 45 | 46 | For dcgan.torch software 47 | 48 | Copyright (c) 2015, Facebook, Inc. All rights reserved. 49 | 50 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 51 | 52 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 53 | 54 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 55 | 56 | Neither the name Facebook nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 57 | 58 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 59 | -------------------------------------------------------------------------------- /data/yh_seg_spleenonly_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import torchvision.transforms as transforms 3 | from data.base_dataset import BaseDataset, get_transform 4 | from PIL import Image 5 | import torch 6 | import random 7 | import random_crop_yh 8 | 9 | class yhSegDatasetSpleenOnly(BaseDataset): 10 | def initialize(self, opt): 11 | self.opt = opt 12 | self.root = opt.dataroot 13 | self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A') 14 | self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B') 15 | 16 | self.dir_A = opt.raw_MRI_dir 17 | self.dir_B = opt.raw_CT_dir 18 | self.dir_Seg = opt.raw_MRI_seg_dir 19 | 20 | self.A_paths = opt.imglist_MRI 21 | self.B_paths = opt.imglist_CT 22 | 23 | self.A_size = len(self.A_paths) 24 | self.B_size = len(self.B_paths) 25 | if not self.opt.isTrain: 26 | self.skipcrop = True 27 | else: 28 | self.skipcrop = False 29 | # self.transform = get_transform(opt) 30 | 31 | if self.skipcrop: 32 | osize = [opt.fineSize, opt.fineSize] 33 | else: 34 | osize = [opt.loadSize, opt.loadSize] 35 | transform_list = [] 36 | transform_list.append(transforms.Scale(osize, Image.BICUBIC)) 37 | self.transforms_scale = transforms.Compose(transform_list) 38 | 39 | transform_list = [] 40 | transform_list.append(transforms.Scale(osize, Image.NEAREST)) 41 | self.transforms_seg_scale = transforms.Compose(transform_list) 42 | 43 | transform_list = [] 44 | transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize)) 45 | self.transforms_crop = transforms.Compose(transform_list) 46 | 47 | transform_list = [] 48 | transform_list.append(transforms.ToTensor()) 49 | self.transforms_toTensor = transforms.Compose(transform_list) 50 | 51 | transform_list = [] 52 | transform_list.append(transforms.Normalize((0.5, 0.5, 0.5), 53 | (0.5, 0.5, 0.5))) 54 | self.transforms_normalize = transforms.Compose(transform_list) 55 | 56 | 57 | def __getitem__(self, index): 58 | index_A = index % self.A_size 59 | A_path = self.A_paths[index_A] 60 | Seg_path = A_path.replace(self.dir_A,self.dir_Seg) 61 | Seg_path = Seg_path.replace('_rawimg','_organlabel') 62 | 63 | index_B = random.randint(0, self.B_size - 1) 64 | B_path = self.B_paths[index_B] 65 | A_img = Image.open(A_path).convert('L') 66 | Seg_img = Image.open(Seg_path).convert('I') 67 | B_img = Image.open(B_path).convert('L') 68 | 69 | A_img = self.transforms_scale(A_img) 70 | B_img = self.transforms_scale(B_img) 71 | Seg_img = self.transforms_seg_scale(Seg_img) 72 | 73 | if not self.skipcrop: 74 | [A_img,Seg_img] = self.transforms_crop([A_img, Seg_img]) 75 | [B_img] = self.transforms_crop([B_img]) 76 | 77 | A_img = self.transforms_toTensor(A_img) 78 | B_img = self.transforms_toTensor(B_img) 79 | Seg_img = self.transforms_toTensor(Seg_img) 80 | 81 | A_img = self.transforms_normalize(A_img) 82 | B_img = self.transforms_normalize(B_img) 83 | 84 | Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize) 85 | Seg_imgs[0, :, :] = Seg_img == 0 86 | Seg_imgs[1, :, :] = Seg_img == 1 87 | 88 | 89 | return {'A': A_img, 'B': B_img, 'Seg': Seg_imgs, 'Seg_one': Seg_img, 90 | 'A_paths': A_path, 'B_paths': B_path, 'Seg_paths':Seg_path} 91 | 92 | 93 | def __len__(self): 94 | return max(self.A_size, self.B_size) 95 | 96 | def name(self): 97 | return 'UnalignedDataset' 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SynSeg-Net 2 | (End-to-end Synthesis and Segmentation Network) 3 | 4 | ## Adversarial Synthesis Learning Enables Segmentation Without Target Modality Ground Truth 5 | 6 | This is our ongoing PyTorch implementation for end-to-end synthesis and segmentation without groudtruth. 7 | The paper can be found in [arXiv](https://arxiv.org/abs/1712.07695) for ISBI 2018 8 | The video can be found in [video](https://youtu.be/LTTh1WWPZ1o) on youtube 9 | 10 | The code was written by [Yuankai Huo](https://sites.google.com/site/yuankaihuo/) and developed upon [CycleGAN Torch](https://github.com/junyanz/CycleGAN). 11 | 12 | 13 | 14 | 15 | 16 | 17 | If you use this code for your research, please cite : 18 | 19 | Yuankai Huo, Zhoubing Xu, Shunxing Bao, Albert Assad, Richard G. Abramson, Bennett A. Landman. [Adversarial Synthesis Learning Enables Segmentation Without Target Modality Ground Truth.](https://arxiv.org/abs/1712.07695) In [arXiv](https://arxiv.org/abs/1712.07695) (2017). 20 | 21 | or 22 | 23 | Yuankai Huo, Zhoubing Xu, Hyeonsoo Moon, Shunxing Bao, Albert Assad, Tamara K. Moyo, Michael R. Savona, Richard G. Abramson, and Bennett A. Landman. [SynSeg-Net: Synthetic Segmentation Without Target Modality Ground Truth.](https://arxiv.org/abs/1810.06498) IEEE transactions on medical imaging (2018). 24 | 25 | 26 | 27 | ## Prerequisites 28 | - Linux or macOS 29 | - Python 2 30 | - CPU or NVIDIA GPU + CUDA CuDNN 31 | - pytorch 0.2 32 | 33 | ## Training Data and Testing Data 34 | We used MRI and CT 2D slices (from coronal view) as well as MRI segmentatons as training data. 35 | We used CT 2D slices (from coronal view) as testing data 36 | The data orgnization can be seen in the txt files in `sublist` directory 37 | 38 | ## Training 39 | - Train the model 40 | ```bash 41 | python train_yh.py --dataroot ./datasets/yh --name yh_cyclegan_imgandseg --batchSize 4 --model cycle_seg --pool_size 50 --no_dropout --yh_run_model Train --dataset_mode yh_seg --input_nc 1 --seg_norm CrossEntropy --output_nc 1 --output_nc_seg 7 --checkpoints_dir /home-local/Cycle_Deep/Checkpoints/ --test_seg_output_dir /home-local/Cycle_Deep/Output/ --display_id 0 42 | ``` 43 | - 'name' is 44 | `--model` "cycle_seg" means EssNet 45 | `--yh_run_model` " Train" means do training 46 | `--output_nv_seg` defines number of segmentation labels 47 | `--checkpoints_dir` the place to save checkpoint (model) 48 | `--test_seg_output_dir` the place to save the test segmentation 49 | 50 | ## Testing 51 | - Test the synthesis 52 | ```bash 53 | python train_yh.py --dataroot ./datasets/yh --name yh_cyclegan_imgandseg --batchSize 4 --model cycle_gan --pool_size 50 --no_dropout --yh_run_model Test --dataset_mode yh --input_nc 1 --output_nc 1 --checkpoints_dir /home-local/Cycle_Deep/Checkpoints/ --test_seg_output_dir /home-local/Cycle_Deep/Output/ --which_epoch 50 54 | ``` 55 | 56 | - Test the segmentation 57 | ```bash 58 | python train_yh.py --dataroot ./datasets/yh --name yh_cyclegan_imgandseg --batchSize 4 --model test_seg --pool_size 50 --no_dropout --yh_run_model TestSeg --dataset_mode yh_test_seg --input_nc 1 --output_nc 1 --checkpoints_dir/home-local/Cycle_Deep/Checkpoints/ --test_seg_output_dir /home-local/Cycle_Deep/Output/ --which_epoch 50 59 | ``` 60 | - 'name' is 61 | `--which_epoch` which training epoch to load 62 | 63 | 64 | ## Citation 65 | If you use this code for your research, please cite our papers. 66 | ``` 67 | @article{huo2017adversarial, 68 | title={Adversarial Synthesis Learning Enables Segmentation Without Target Modality Ground Truth}, 69 | author={Huo, Yuankai and Xu, Zhoubing and Bao, Shunxing and Assad, Albert and Abramson, Richard G and Landman, Bennett A}, 70 | journal={arXiv preprint arXiv:1712.07695}, 71 | year={2017} 72 | } 73 | ``` 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /torchsrc/datasets/apc/mit_benchmark.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import os 3 | import os.path as osp 4 | 5 | import numpy as np 6 | import scipy.misc 7 | from sklearn.model_selection import train_test_split 8 | 9 | from base import APC2016Base 10 | 11 | 12 | def ids_from_scene_dir(scene_dir, empty_scene_dir): 13 | for i_frame in itertools.count(): 14 | empty_file = osp.join( 15 | empty_scene_dir, 'frame-{:06}.color.png'.format(i_frame)) 16 | rgb_file = osp.join( 17 | scene_dir, 'frame-{:06}.color.png'.format(i_frame)) 18 | segm_file = osp.join( 19 | scene_dir, 'segm/frame-{:06}.segm.png'.format(i_frame)) 20 | if not (osp.exists(rgb_file) and osp.exists(segm_file)): 21 | break 22 | data_id = (empty_file, rgb_file, segm_file) 23 | yield data_id 24 | 25 | 26 | def bin_id_from_scene_dir(scene_dir): 27 | caminfo = open(osp.join(scene_dir, 'cam.info.txt')).read() 28 | loc = caminfo.splitlines()[0].split(': ')[-1] 29 | if loc == 'shelf': 30 | bin_id = caminfo.splitlines()[1][-1] 31 | else: 32 | bin_id = 'tote' 33 | return bin_id 34 | 35 | 36 | class APC2016mit_benchmark(APC2016Base): 37 | 38 | def __init__(self, split='train', transform=False): 39 | assert split in ['train', 'valid', 'all'] 40 | self.split = split 41 | self._transform = transform 42 | self.dataset_dir = osp.expanduser('~/data/datasets/APC2016/benchmark') 43 | data_ids = self._get_ids() 44 | ids_train, ids_valid = train_test_split( 45 | data_ids, test_size=0.25, random_state=1234) 46 | self._ids = {'train': ids_train, 'valid': ids_valid, 'all': data_ids} 47 | 48 | def __len__(self): 49 | return len(self._ids[self.split]) 50 | 51 | def _get_ids_from_loc_dir(self, env, loc_dir): 52 | assert env in ('office', 'warehouse') 53 | loc = osp.basename(loc_dir) 54 | data_ids = [] 55 | for scene_dir in os.listdir(loc_dir): 56 | scene_dir = osp.join(loc_dir, scene_dir) 57 | bin_id = bin_id_from_scene_dir(scene_dir) 58 | empty_dir = osp.join( 59 | self.dataset_dir, env, 'empty', loc, 'scene-{}'.format(bin_id)) 60 | data_ids += list(ids_from_scene_dir(scene_dir, empty_dir)) 61 | return data_ids 62 | 63 | def _get_ids(self): 64 | data_ids = [] 65 | # office 66 | contain_dir = osp.join(self.dataset_dir, 'office/test') 67 | for loc in ['shelf', 'tote']: 68 | loc_dir = osp.join(contain_dir, loc) 69 | data_ids += self._get_ids_from_loc_dir('office', loc_dir) 70 | # warehouse 71 | contain_dir = osp.join(self.dataset_dir, 'warehouse') 72 | for sub in ['practice', 'competition']: 73 | sub_contain_dir = osp.join(contain_dir, sub) 74 | for loc in ['shelf', 'tote']: 75 | loc_dir = osp.join(sub_contain_dir, loc) 76 | data_ids += self._get_ids_from_loc_dir('warehouse', loc_dir) 77 | return data_ids 78 | 79 | def _load_from_id(self, data_id): 80 | empty_file, rgb_file, segm_file = data_id 81 | img = scipy.misc.imread(rgb_file, mode='RGB') 82 | img_empty = scipy.misc.imread(empty_file, mode='RGB') 83 | # Label value is multiplied by 9: 84 | # ex) 0: 0/6=0 (background), 54: 54/6=9 (dasani_bottle_water) 85 | lbl = scipy.misc.imread(segm_file, mode='L') / 6 86 | lbl = lbl.astype(np.int32) 87 | img_empty[lbl > 0] = img[lbl > 0] 88 | return img_empty, lbl 89 | 90 | def __getitem__(self, index): 91 | data_id = self._ids[self.split][index] 92 | img, lbl = self._load_from_id(data_id) 93 | if self._transform: 94 | return self.transform(img, lbl) 95 | else: 96 | return img, lbl 97 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split): 15 | n = caffe.NetSpec() 16 | n.data, n.label = L.Python(module='pascalcontext_layers', 17 | layer='PASCALContextSegDataLayer', ntop=2, 18 | param_str=str(dict(voc_dir='../../data/pascal', 19 | context_dir='../../data/pascal-context', split=split, 20 | seed=1337))) 21 | 22 | # the base net 23 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) 24 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 25 | n.pool1 = max_pool(n.relu1_2) 26 | 27 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 28 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 29 | n.pool2 = max_pool(n.relu2_2) 30 | 31 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 32 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 33 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 34 | n.pool3 = max_pool(n.relu3_3) 35 | 36 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 37 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 38 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 39 | n.pool4 = max_pool(n.relu4_3) 40 | 41 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 42 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 43 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 44 | n.pool5 = max_pool(n.relu5_3) 45 | 46 | # fully conv 47 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 48 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 49 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 50 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 51 | 52 | n.score_fr = L.Convolution(n.drop7, num_output=60, kernel_size=1, pad=0, 53 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 54 | n.upscore2 = L.Deconvolution(n.score_fr, 55 | convolution_param=dict(num_output=60, kernel_size=4, stride=2, 56 | bias_term=False), 57 | param=[dict(lr_mult=0)]) 58 | 59 | n.score_pool4 = L.Convolution(n.pool4, num_output=60, kernel_size=1, pad=0, 60 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 61 | n.score_pool4c = crop(n.score_pool4, n.upscore2) 62 | n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c, 63 | operation=P.Eltwise.SUM) 64 | n.upscore_pool4 = L.Deconvolution(n.fuse_pool4, 65 | convolution_param=dict(num_output=60, kernel_size=4, stride=2, 66 | bias_term=False), 67 | param=[dict(lr_mult=0)]) 68 | 69 | n.score_pool3 = L.Convolution(n.pool3, num_output=60, kernel_size=1, pad=0, 70 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 71 | n.score_pool3c = crop(n.score_pool3, n.upscore_pool4) 72 | n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c, 73 | operation=P.Eltwise.SUM) 74 | n.upscore8 = L.Deconvolution(n.fuse_pool3, 75 | convolution_param=dict(num_output=60, kernel_size=16, stride=8, 76 | bias_term=False), 77 | param=[dict(lr_mult=0)]) 78 | 79 | n.score = crop(n.upscore8, n.data) 80 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 81 | loss_param=dict(normalize=False, ignore_label=255)) 82 | 83 | return n.to_proto() 84 | 85 | def make_net(): 86 | with open('train.prototxt', 'w') as f: 87 | f.write(str(fcn('train'))) 88 | 89 | with open('val.prototxt', 'w') as f: 90 | f.write(str(fcn('val'))) 91 | 92 | if __name__ == '__main__': 93 | make_net() 94 | -------------------------------------------------------------------------------- /util/get_data.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import tarfile 4 | import requests 5 | from warnings import warn 6 | from zipfile import ZipFile 7 | from bs4 import BeautifulSoup 8 | from os.path import abspath, isdir, join, basename 9 | 10 | 11 | class GetData(object): 12 | """ 13 | 14 | Download CycleGAN or Pix2Pix Data. 15 | 16 | Args: 17 | technique : str 18 | One of: 'cyclegan' or 'pix2pix'. 19 | verbose : bool 20 | If True, print additional information. 21 | 22 | Examples: 23 | >>> from util.get_data import GetData 24 | >>> gd = GetData(technique='cyclegan') 25 | >>> new_data_path = gd.get(save_path='./datasets') # options will be displayed. 26 | 27 | """ 28 | 29 | def __init__(self, technique='cyclegan', verbose=True): 30 | url_dict = { 31 | 'pix2pix': 'https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets', 32 | 'cyclegan': 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets' 33 | } 34 | self.url = url_dict.get(technique.lower()) 35 | self._verbose = verbose 36 | 37 | def _print(self, text): 38 | if self._verbose: 39 | print(text) 40 | 41 | @staticmethod 42 | def _get_options(r): 43 | soup = BeautifulSoup(r.text, 'lxml') 44 | options = [h.text for h in soup.find_all('a', href=True) 45 | if h.text.endswith(('.zip', 'tar.gz'))] 46 | return options 47 | 48 | def _present_options(self): 49 | r = requests.get(self.url) 50 | options = self._get_options(r) 51 | print('Options:\n') 52 | for i, o in enumerate(options): 53 | print("{0}: {1}".format(i, o)) 54 | choice = input("\nPlease enter the number of the " 55 | "dataset above you wish to download:") 56 | return options[int(choice)] 57 | 58 | def _download_data(self, dataset_url, save_path): 59 | if not isdir(save_path): 60 | os.makedirs(save_path) 61 | 62 | base = basename(dataset_url) 63 | temp_save_path = join(save_path, base) 64 | 65 | with open(temp_save_path, "wb") as f: 66 | r = requests.get(dataset_url) 67 | f.write(r.content) 68 | 69 | if base.endswith('.tar.gz'): 70 | obj = tarfile.open(temp_save_path) 71 | elif base.endswith('.zip'): 72 | obj = ZipFile(temp_save_path, 'r') 73 | else: 74 | raise ValueError("Unknown File Type: {0}.".format(base)) 75 | 76 | self._print("Unpacking Data...") 77 | obj.extractall(save_path) 78 | obj.close() 79 | os.remove(temp_save_path) 80 | 81 | def get(self, save_path, dataset=None): 82 | """ 83 | 84 | Download a dataset. 85 | 86 | Args: 87 | save_path : str 88 | A directory to save the data to. 89 | dataset : str, optional 90 | A specific dataset to download. 91 | Note: this must include the file extension. 92 | If None, options will be presented for you 93 | to choose from. 94 | 95 | Returns: 96 | save_path_full : str 97 | The absolute path to the downloaded data. 98 | 99 | """ 100 | if dataset is None: 101 | selected_dataset = self._present_options() 102 | else: 103 | selected_dataset = dataset 104 | 105 | save_path_full = join(save_path, selected_dataset.split('.')[0]) 106 | 107 | if isdir(save_path_full): 108 | warn("\n'{0}' already exists. Voiding Download.".format( 109 | save_path_full)) 110 | else: 111 | self._print('Downloading Data...') 112 | url = "{0}/{1}".format(self.url, selected_dataset) 113 | self._download_data(url, save_path=save_path) 114 | 115 | return abspath(save_path_full) 116 | -------------------------------------------------------------------------------- /data/yh_test_seg_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import torchvision.transforms as transforms 3 | from data.base_dataset import BaseDataset, get_transform 4 | from PIL import Image 5 | import torch 6 | import random 7 | import random_crop_yh 8 | 9 | class yhTestSegDataset(BaseDataset): 10 | def initialize(self, opt): 11 | self.opt = opt 12 | self.root = opt.dataroot 13 | 14 | self.dir_A = opt.test_CT_dir 15 | # self.dir_Seg = opt.test_CT_seg_dir 16 | 17 | self.A_paths = opt.imglist_testCT 18 | 19 | self.A_size = len(self.A_paths) 20 | 21 | if not self.opt.isTrain: 22 | self.skipcrop = True 23 | else: 24 | self.skipcrop = False 25 | # self.transform = get_transform(opt) 26 | 27 | if self.skipcrop: 28 | osize = [opt.fineSize, opt.fineSize] 29 | else: 30 | osize = [opt.loadSize, opt.loadSize] 31 | transform_list = [] 32 | transform_list.append(transforms.Scale(osize, Image.BICUBIC)) 33 | self.transforms_scale = transforms.Compose(transform_list) 34 | 35 | transform_list = [] 36 | transform_list.append(transforms.Scale(osize, Image.NEAREST)) 37 | self.transforms_seg_scale = transforms.Compose(transform_list) 38 | 39 | transform_list = [] 40 | transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize)) 41 | self.transforms_crop = transforms.Compose(transform_list) 42 | 43 | transform_list = [] 44 | transform_list.append(transforms.ToTensor()) 45 | self.transforms_toTensor = transforms.Compose(transform_list) 46 | 47 | transform_list = [] 48 | transform_list.append(transforms.Normalize((0.5, 0.5, 0.5), 49 | (0.5, 0.5, 0.5))) 50 | self.transforms_normalize = transforms.Compose(transform_list) 51 | 52 | 53 | def __getitem__(self, index): 54 | A_path = self.A_paths[index % self.A_size] 55 | # Seg_path = A_path.replace(self.dir_A,self.dir_Seg) 56 | # Seg_path = Seg_path.replace('_rawimg','_organlabel') 57 | 58 | A_img = Image.open(A_path).convert('L') 59 | # Seg_img = Image.open(Seg_path).convert('I') 60 | 61 | A_img = self.transforms_scale(A_img) 62 | # Seg_img = self.transforms_seg_scale(Seg_img) 63 | 64 | A_img = self.transforms_toTensor(A_img) 65 | # Seg_img = self.transforms_toTensor(Seg_img) 66 | 67 | A_img = self.transforms_normalize(A_img) 68 | 69 | #strategy 1 70 | # Seg_img[Seg_img == 6] = 4 71 | # Seg_img[Seg_img == 7] = 5 72 | # Seg_img[Seg_img == 14] = 6 73 | # 74 | # Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize) 75 | # Seg_imgs[0, :, :] = Seg_img == 0 76 | # Seg_imgs[1, :, :] = Seg_img == 1 77 | # Seg_imgs[2, :, :] = Seg_img == 2 78 | # Seg_imgs[3, :, :] = Seg_img == 3 79 | # Seg_imgs[4, :, :] = Seg_img == 4 80 | # Seg_imgs[5, :, :] = Seg_img == 5 81 | # Seg_imgs[6, :, :] = Seg_img == 6 82 | 83 | #strategy 2 84 | # Seg_img[Seg_img == 2] = 3 85 | # Seg_img[Seg_img == 14] = 3 86 | # Seg_img[Seg_img == 3] = 3 87 | # Seg_img[Seg_img == 4] = 3 88 | # Seg_img[Seg_img == 5] = 3 89 | # Seg_img[Seg_img == 7] = 3 90 | # Seg_img[Seg_img == 6] = 2 91 | # 92 | # Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize) 93 | # Seg_imgs[0, :, :] = Seg_img == 0 94 | # Seg_imgs[1, :, :] = Seg_img == 1 95 | # Seg_imgs[2, :, :] = Seg_img == 2 96 | # Seg_imgs[3, :, :] = Seg_img == 3 97 | Seg_imgs = 0 98 | Seg_path = '' 99 | 100 | return {'A': A_img, 'Seg': Seg_imgs, 101 | 'A_paths': A_path, 'Seg_paths':Seg_path} 102 | 103 | def __len__(self): 104 | return self.A_size 105 | 106 | def name(self): 107 | return 'TestCTDataset' 108 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/data/nyud/train.txt: -------------------------------------------------------------------------------- 1 | 5449 2 | 6140 3 | 5902 4 | 5543 5 | 6392 6 | 5425 7 | 5121 8 | 5506 9 | 5696 10 | 6239 11 | 6143 12 | 5485 13 | 5990 14 | 5322 15 | 6138 16 | 5986 17 | 5756 18 | 5323 19 | 5158 20 | 5921 21 | 5855 22 | 5478 23 | 5898 24 | 5415 25 | 6054 26 | 5161 27 | 5318 28 | 5218 29 | 5460 30 | 6056 31 | 6313 32 | 5595 33 | 5256 34 | 5353 35 | 5044 36 | 5177 37 | 6029 38 | 5980 39 | 5493 40 | 5528 41 | 5904 42 | 5895 43 | 5881 44 | 5275 45 | 5829 46 | 5426 47 | 6334 48 | 5548 49 | 5988 50 | 5714 51 | 5254 52 | 5309 53 | 5253 54 | 5255 55 | 5983 56 | 5752 57 | 5005 58 | 6240 59 | 5546 60 | 5695 61 | 5684 62 | 5751 63 | 6274 64 | 5882 65 | 5730 66 | 5495 67 | 5489 68 | 5749 69 | 6244 70 | 5599 71 | 5503 72 | 5319 73 | 5418 74 | 5454 75 | 5937 76 | 5416 77 | 5989 78 | 5505 79 | 6352 80 | 6237 81 | 6139 82 | 5901 83 | 5421 84 | 5498 85 | 5602 86 | 5083 87 | 5944 88 | 5456 89 | 6122 90 | 6333 91 | 5417 92 | 5981 93 | 5165 94 | 6417 95 | 5758 96 | 5527 97 | 5082 98 | 5805 99 | 5308 100 | 5828 101 | 5120 102 | 5214 103 | 5530 104 | 6026 105 | 5452 106 | 5008 107 | 5251 108 | 6047 109 | 6238 110 | 6008 111 | 5925 112 | 5873 113 | 6366 114 | 5156 115 | 5875 116 | 6311 117 | 6224 118 | 6169 119 | 5922 120 | 5877 121 | 5615 122 | 5896 123 | 5715 124 | 5890 125 | 6141 126 | 5179 127 | 5215 128 | 5685 129 | 6246 130 | 5641 131 | 5058 132 | 5807 133 | 5122 134 | 5423 135 | 5716 136 | 5652 137 | 5262 138 | 5978 139 | 5429 140 | 5542 141 | 5598 142 | 5984 143 | 5354 144 | 5261 145 | 6044 146 | 5003 147 | 5888 148 | 5422 149 | 5124 150 | 5219 151 | 6009 152 | 6087 153 | 5892 154 | 6168 155 | 5616 156 | 5754 157 | 5547 158 | 5393 159 | 5889 160 | 5750 161 | 5963 162 | 5500 163 | 5004 164 | 5303 165 | 6269 166 | 6243 167 | 5885 168 | 5019 169 | 5757 170 | 6267 171 | 5809 172 | 5321 173 | 5529 174 | 5643 175 | 5748 176 | 5501 177 | 6137 178 | 5213 179 | 5259 180 | 5596 181 | 5745 182 | 5653 183 | 6418 184 | 5507 185 | 5136 186 | 5453 187 | 6367 188 | 5544 189 | 6046 190 | 6271 191 | 5252 192 | 5488 193 | 5480 194 | 5080 195 | 5504 196 | 5274 197 | 5578 198 | 5920 199 | 5654 200 | 5924 201 | 5260 202 | 5394 203 | 6041 204 | 5263 205 | 6223 206 | 5642 207 | 6121 208 | 5497 209 | 5939 210 | 5491 211 | 5825 212 | 5753 213 | 5320 214 | 5487 215 | 6042 216 | 6270 217 | 5940 218 | 5157 219 | 5479 220 | 5496 221 | 5639 222 | 5392 223 | 6177 224 | 5614 225 | 5451 226 | 6312 227 | 6199 228 | 5667 229 | 5666 230 | 6198 231 | 5006 232 | 5427 233 | 5887 234 | 5755 235 | 6200 236 | 5461 237 | 6120 238 | 5982 239 | 6416 240 | 5277 241 | 5884 242 | 6142 243 | 6268 244 | 5880 245 | 6266 246 | 5166 247 | 5258 248 | 5420 249 | 5490 250 | 5135 251 | 5655 252 | 5391 253 | 5682 254 | 5853 255 | 5905 256 | 6045 257 | 5576 258 | 5827 259 | 5492 260 | 5943 261 | 5574 262 | 5307 263 | 5428 264 | 5874 265 | 6006 266 | 5458 267 | 5883 268 | 6030 269 | 5808 270 | 5964 271 | 5305 272 | 5159 273 | 5540 274 | 6178 275 | 6024 276 | 5484 277 | 5832 278 | 6031 279 | 5459 280 | 6028 281 | 5729 282 | 5601 283 | 6415 284 | 5483 285 | 5324 286 | 5894 287 | 5830 288 | 6025 289 | 5854 290 | 5164 291 | 6350 292 | 5903 293 | 6296 294 | 5600 295 | 5486 296 | 5007 297 | 6055 298 | 5747 299 | 5872 300 | 5856 301 | 5482 302 | 5424 303 | 5987 304 | 6222 305 | 5597 306 | 5876 307 | 5824 308 | 5178 309 | 6085 310 | 5979 311 | 6197 312 | 5985 313 | 5572 314 | 5899 315 | 5020 316 | 6241 317 | 5276 318 | 5938 319 | 5806 320 | 6272 321 | 6043 322 | 5502 323 | 5893 324 | 6105 325 | 5160 326 | 5886 327 | 6007 328 | 5923 329 | 5942 330 | 5665 331 | 6225 332 | 5577 333 | 5257 334 | 6273 335 | 5481 336 | 5162 337 | 5217 338 | 5457 339 | 6245 340 | 5879 341 | 6005 342 | 6309 343 | 5575 344 | 5494 345 | 5900 346 | 5216 347 | 5304 348 | 5499 349 | 5746 350 | 5545 351 | 5045 352 | 6236 353 | 5278 354 | 6242 355 | 5123 356 | 5450 357 | 5306 358 | 5419 359 | 5897 360 | 5831 361 | 6086 362 | 5891 363 | 5455 364 | 6351 365 | 5878 366 | 5826 367 | 5081 368 | 6420 369 | 6393 370 | 6040 371 | 5573 372 | 6310 373 | 5640 374 | 5936 375 | 5541 376 | 6221 377 | 5163 378 | 6027 379 | 5941 380 | 5683 381 | 6419 -------------------------------------------------------------------------------- /data/yh_seg_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import torchvision.transforms as transforms 3 | from data.base_dataset import BaseDataset, get_transform 4 | from PIL import Image 5 | import torch 6 | import random 7 | import random_crop_yh 8 | 9 | class yhSegDataset(BaseDataset): 10 | def initialize(self, opt): 11 | self.opt = opt 12 | self.root = opt.dataroot 13 | self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A') 14 | self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B') 15 | 16 | self.dir_A = opt.raw_MRI_dir 17 | self.dir_B = opt.raw_CT_dir 18 | self.dir_Seg = opt.raw_MRI_seg_dir 19 | 20 | self.A_paths = opt.imglist_MRI 21 | self.B_paths = opt.imglist_CT 22 | 23 | self.A_size = len(self.A_paths) 24 | self.B_size = len(self.B_paths) 25 | if not self.opt.isTrain: 26 | self.skipcrop = True 27 | else: 28 | self.skipcrop = False 29 | # self.transform = get_transform(opt) 30 | 31 | if self.skipcrop: 32 | osize = [opt.fineSize, opt.fineSize] 33 | else: 34 | osize = [opt.loadSize, opt.loadSize] 35 | transform_list = [] 36 | transform_list.append(transforms.Scale(osize, Image.BICUBIC)) 37 | self.transforms_scale = transforms.Compose(transform_list) 38 | 39 | transform_list = [] 40 | transform_list.append(transforms.Scale(osize, Image.NEAREST)) 41 | self.transforms_seg_scale = transforms.Compose(transform_list) 42 | 43 | transform_list = [] 44 | transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize)) 45 | self.transforms_crop = transforms.Compose(transform_list) 46 | 47 | transform_list = [] 48 | transform_list.append(transforms.ToTensor()) 49 | self.transforms_toTensor = transforms.Compose(transform_list) 50 | 51 | transform_list = [] 52 | transform_list.append(transforms.Normalize((0.5, 0.5, 0.5), 53 | (0.5, 0.5, 0.5))) 54 | self.transforms_normalize = transforms.Compose(transform_list) 55 | 56 | 57 | def __getitem__(self, index): 58 | index_A = index % self.A_size 59 | A_path = self.A_paths[index_A] 60 | Seg_path = A_path.replace(self.dir_A,self.dir_Seg) 61 | Seg_path = Seg_path.replace('_rawimg','_organlabel') 62 | 63 | index_B = random.randint(0, self.B_size - 1) 64 | B_path = self.B_paths[index_B] 65 | A_img = Image.open(A_path).convert('L') 66 | Seg_img = Image.open(Seg_path).convert('I') 67 | B_img = Image.open(B_path).convert('L') 68 | 69 | A_img = self.transforms_scale(A_img) 70 | B_img = self.transforms_scale(B_img) 71 | Seg_img = self.transforms_seg_scale(Seg_img) 72 | 73 | if not self.skipcrop: 74 | [A_img,Seg_img] = self.transforms_crop([A_img, Seg_img]) 75 | [B_img] = self.transforms_crop([B_img]) 76 | 77 | A_img = self.transforms_toTensor(A_img) 78 | B_img = self.transforms_toTensor(B_img) 79 | Seg_img = self.transforms_toTensor(Seg_img) 80 | 81 | A_img = self.transforms_normalize(A_img) 82 | B_img = self.transforms_normalize(B_img) 83 | 84 | Seg_img[Seg_img == 6] = 4 85 | Seg_img[Seg_img == 7] = 5 86 | Seg_img[Seg_img == 14] = 6 87 | 88 | Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize) 89 | Seg_imgs[0, :, :] = Seg_img == 0 90 | Seg_imgs[1, :, :] = Seg_img == 1 91 | Seg_imgs[2, :, :] = Seg_img == 2 92 | Seg_imgs[3, :, :] = Seg_img == 3 93 | Seg_imgs[4, :, :] = Seg_img == 4 94 | Seg_imgs[5, :, :] = Seg_img == 5 95 | Seg_imgs[6, :, :] = Seg_img == 6 96 | 97 | return {'A': A_img, 'B': B_img, 'Seg': Seg_imgs, 'Seg_one': Seg_img, 98 | 'A_paths': A_path, 'B_paths': B_path, 'Seg_paths':Seg_path} 99 | 100 | 101 | def __len__(self): 102 | return max(self.A_size, self.B_size) 103 | 104 | def name(self): 105 | return 'UnalignedDataset' 106 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split): 15 | n = caffe.NetSpec() 16 | pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), 17 | seed=1337) 18 | if split == 'train': 19 | pydata_params['sbdd_dir'] = '../data/sbdd/dataset' 20 | pylayer = 'SBDDSegDataLayer' 21 | else: 22 | pydata_params['voc_dir'] = '../data/pascal/VOC2011' 23 | pylayer = 'VOCSegDataLayer' 24 | n.data, n.label = L.Python(module='voc_layers', layer=pylayer, 25 | ntop=2, param_str=str(pydata_params)) 26 | 27 | # the base net 28 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) 29 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 30 | n.pool1 = max_pool(n.relu1_2) 31 | 32 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 33 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 34 | n.pool2 = max_pool(n.relu2_2) 35 | 36 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 37 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 38 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 39 | n.pool3 = max_pool(n.relu3_3) 40 | 41 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 42 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 43 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 44 | n.pool4 = max_pool(n.relu4_3) 45 | 46 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 47 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 48 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 49 | n.pool5 = max_pool(n.relu5_3) 50 | 51 | # fully conv 52 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 53 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 54 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 55 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 56 | n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0, 57 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 58 | n.upscore2 = L.Deconvolution(n.score_fr, 59 | convolution_param=dict(num_output=21, kernel_size=4, stride=2, 60 | bias_term=False), 61 | param=[dict(lr_mult=0)]) 62 | 63 | n.score_pool4 = L.Convolution(n.pool4, num_output=21, kernel_size=1, pad=0, 64 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 65 | n.score_pool4c = crop(n.score_pool4, n.upscore2) 66 | n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c, 67 | operation=P.Eltwise.SUM) 68 | n.upscore_pool4 = L.Deconvolution(n.fuse_pool4, 69 | convolution_param=dict(num_output=21, kernel_size=4, stride=2, 70 | bias_term=False), 71 | param=[dict(lr_mult=0)]) 72 | 73 | n.score_pool3 = L.Convolution(n.pool3, num_output=21, kernel_size=1, pad=0, 74 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 75 | n.score_pool3c = crop(n.score_pool3, n.upscore_pool4) 76 | n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c, 77 | operation=P.Eltwise.SUM) 78 | n.upscore8 = L.Deconvolution(n.fuse_pool3, 79 | convolution_param=dict(num_output=21, kernel_size=16, stride=8, 80 | bias_term=False), 81 | param=[dict(lr_mult=0)]) 82 | 83 | n.score = crop(n.upscore8, n.data) 84 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 85 | loss_param=dict(normalize=False, ignore_label=255)) 86 | 87 | return n.to_proto() 88 | 89 | def make_net(): 90 | with open('train.prototxt', 'w') as f: 91 | f.write(str(fcn('train'))) 92 | 93 | with open('val.prototxt', 'w') as f: 94 | f.write(str(fcn('seg11valid'))) 95 | 96 | if __name__ == '__main__': 97 | make_net() 98 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def modality_fcn(net_spec, data, modality): 15 | n = net_spec 16 | # the base net 17 | n['conv1_1' + modality], n['relu1_1' + modality] = conv_relu(n[data], 64, 18 | pad=100) 19 | n['conv1_2' + modality], n['relu1_2' + modality] = conv_relu(n['relu1_1' + 20 | modality], 64) 21 | n['pool1' + modality] = max_pool(n['relu1_2' + modality]) 22 | 23 | n['conv2_1' + modality], n['relu2_1' + modality] = conv_relu(n['pool1' + 24 | modality], 128) 25 | n['conv2_2' + modality], n['relu2_2' + modality] = conv_relu(n['relu2_1' + 26 | modality], 128) 27 | n['pool2' + modality] = max_pool(n['relu2_2' + modality]) 28 | 29 | n['conv3_1' + modality], n['relu3_1' + modality] = conv_relu(n['pool2' + 30 | modality], 256) 31 | n['conv3_2' + modality], n['relu3_2' + modality] = conv_relu(n['relu3_1' + 32 | modality], 256) 33 | n['conv3_3' + modality], n['relu3_3' + modality] = conv_relu(n['relu3_2' + 34 | modality], 256) 35 | n['pool3' + modality] = max_pool(n['relu3_3' + modality]) 36 | 37 | n['conv4_1' + modality], n['relu4_1' + modality] = conv_relu(n['pool3' + 38 | modality], 512) 39 | n['conv4_2' + modality], n['relu4_2' + modality] = conv_relu(n['relu4_1' + 40 | modality], 512) 41 | n['conv4_3' + modality], n['relu4_3' + modality] = conv_relu(n['relu4_2' + 42 | modality], 512) 43 | n['pool4' + modality] = max_pool(n['relu4_3' + modality]) 44 | 45 | n['conv5_1' + modality], n['relu5_1' + modality] = conv_relu(n['pool4' + 46 | modality], 512) 47 | n['conv5_2' + modality], n['relu5_2' + modality] = conv_relu(n['relu5_1' + 48 | modality], 512) 49 | n['conv5_3' + modality], n['relu5_3' + modality] = conv_relu(n['relu5_2' + 50 | modality], 512) 51 | n['pool5' + modality] = max_pool(n['relu5_3' + modality]) 52 | 53 | # fully conv 54 | n['fc6' + modality], n['relu6' + modality] = conv_relu( 55 | n['pool5' + modality], 4096, ks=7, pad=0) 56 | n['drop6' + modality] = L.Dropout( 57 | n['relu6' + modality], dropout_ratio=0.5, in_place=True) 58 | n['fc7' + modality], n['relu7' + modality] = conv_relu( 59 | n['drop6' + modality], 4096, ks=1, pad=0) 60 | n['drop7' + modality] = L.Dropout( 61 | n['relu7' + modality], dropout_ratio=0.5, in_place=True) 62 | n['score_fr' + modality] = L.Convolution( 63 | n['drop7' + modality], num_output=40, kernel_size=1, pad=0, 64 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 65 | return n 66 | 67 | def fcn(split, tops): 68 | n = caffe.NetSpec() 69 | n.color, n.hha, n.label = L.Python(module='nyud_layers', 70 | layer='NYUDSegDataLayer', ntop=3, 71 | param_str=str(dict(nyud_dir='../data/nyud', split=split, 72 | tops=tops, seed=1337))) 73 | n = modality_fcn(n, 'color', 'color') 74 | n = modality_fcn(n, 'hha', 'hha') 75 | n.score_fused = L.Eltwise(n.score_frcolor, n.score_frhha, 76 | operation=P.Eltwise.SUM, coeff=[0.5, 0.5]) 77 | n.upscore = L.Deconvolution(n.score_fused, 78 | convolution_param=dict(num_output=40, kernel_size=64, stride=32, 79 | bias_term=False), 80 | param=[dict(lr_mult=0)]) 81 | n.score = crop(n.upscore, n.color) 82 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 83 | loss_param=dict(normalize=False, ignore_label=255)) 84 | return n.to_proto() 85 | 86 | def make_net(): 87 | tops = ['color', 'hha', 'label'] 88 | with open('trainval.prototxt', 'w') as f: 89 | f.write(str(fcn('trainval', tops))) 90 | 91 | with open('test.prototxt', 'w') as f: 92 | f.write(str(fcn('test', tops))) 93 | 94 | if __name__ == '__main__': 95 | make_net() 96 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/data/nyud/val.txt: -------------------------------------------------------------------------------- 1 | 5010 2 | 5011 3 | 5012 4 | 5013 5 | 5022 6 | 5023 7 | 5024 8 | 5025 9 | 5026 10 | 5027 11 | 5048 12 | 5049 13 | 5050 14 | 5051 15 | 5052 16 | 5053 17 | 5054 18 | 5055 19 | 5064 20 | 5065 21 | 5066 22 | 5067 23 | 5068 24 | 5069 25 | 5070 26 | 5071 27 | 5072 28 | 5073 29 | 5074 30 | 5075 31 | 5092 32 | 5093 33 | 5094 34 | 5095 35 | 5096 36 | 5097 37 | 5098 38 | 5099 39 | 5100 40 | 5101 41 | 5102 42 | 5103 43 | 5104 44 | 5105 45 | 5106 46 | 5107 47 | 5108 48 | 5109 49 | 5110 50 | 5111 51 | 5112 52 | 5113 53 | 5114 54 | 5115 55 | 5116 56 | 5130 57 | 5138 58 | 5139 59 | 5140 60 | 5141 61 | 5142 62 | 5143 63 | 5144 64 | 5145 65 | 5146 66 | 5147 67 | 5148 68 | 5149 69 | 5150 70 | 5151 71 | 5152 72 | 5170 73 | 5203 74 | 5204 75 | 5205 76 | 5206 77 | 5223 78 | 5224 79 | 5225 80 | 5226 81 | 5227 82 | 5228 83 | 5229 84 | 5230 85 | 5231 86 | 5232 87 | 5233 88 | 5234 89 | 5235 90 | 5236 91 | 5237 92 | 5238 93 | 5239 94 | 5240 95 | 5241 96 | 5242 97 | 5243 98 | 5244 99 | 5245 100 | 5246 101 | 5247 102 | 5248 103 | 5249 104 | 5265 105 | 5266 106 | 5267 107 | 5268 108 | 5269 109 | 5270 110 | 5286 111 | 5287 112 | 5288 113 | 5289 114 | 5290 115 | 5291 116 | 5292 117 | 5293 118 | 5294 119 | 5295 120 | 5313 121 | 5314 122 | 5336 123 | 5337 124 | 5338 125 | 5339 126 | 5340 127 | 5341 128 | 5342 129 | 5343 130 | 5344 131 | 5345 132 | 5346 133 | 5347 134 | 5348 135 | 5349 136 | 5350 137 | 5365 138 | 5366 139 | 5367 140 | 5368 141 | 5369 142 | 5370 143 | 5371 144 | 5372 145 | 5373 146 | 5374 147 | 5375 148 | 5376 149 | 5377 150 | 5378 151 | 5379 152 | 5380 153 | 5381 154 | 5382 155 | 5383 156 | 5398 157 | 5399 158 | 5400 159 | 5401 160 | 5402 161 | 5403 162 | 5404 163 | 5405 164 | 5406 165 | 5407 166 | 5408 167 | 5409 168 | 5410 169 | 5436 170 | 5437 171 | 5438 172 | 5439 173 | 5440 174 | 5467 175 | 5468 176 | 5514 177 | 5534 178 | 5535 179 | 5536 180 | 5552 181 | 5553 182 | 5554 183 | 5584 184 | 5585 185 | 5586 186 | 5587 187 | 5588 188 | 5589 189 | 5590 190 | 5608 191 | 5609 192 | 5610 193 | 5611 194 | 5622 195 | 5623 196 | 5624 197 | 5625 198 | 5626 199 | 5627 200 | 5628 201 | 5629 202 | 5630 203 | 5631 204 | 5632 205 | 5646 206 | 5647 207 | 5648 208 | 5649 209 | 5659 210 | 5660 211 | 5661 212 | 5662 213 | 5674 214 | 5675 215 | 5691 216 | 5692 217 | 5700 218 | 5701 219 | 5702 220 | 5703 221 | 5704 222 | 5705 223 | 5719 224 | 5720 225 | 5721 226 | 5722 227 | 5723 228 | 5735 229 | 5736 230 | 5737 231 | 5738 232 | 5739 233 | 5740 234 | 5741 235 | 5742 236 | 5788 237 | 5789 238 | 5790 239 | 5791 240 | 5792 241 | 5793 242 | 5794 243 | 5795 244 | 5796 245 | 5797 246 | 5798 247 | 5799 248 | 5815 249 | 5816 250 | 5817 251 | 5818 252 | 5819 253 | 5820 254 | 5847 255 | 5848 256 | 5849 257 | 5863 258 | 5864 259 | 5865 260 | 5866 261 | 5867 262 | 5868 263 | 5909 264 | 5910 265 | 5911 266 | 5912 267 | 5913 268 | 5914 269 | 5915 270 | 5916 271 | 5929 272 | 5930 273 | 5931 274 | 5948 275 | 5949 276 | 5950 277 | 5951 278 | 5952 279 | 5953 280 | 5954 281 | 5955 282 | 5956 283 | 5957 284 | 5958 285 | 5968 286 | 5969 287 | 5996 288 | 5997 289 | 5998 290 | 5999 291 | 6000 292 | 6013 293 | 6014 294 | 6015 295 | 6016 296 | 6017 297 | 6018 298 | 6019 299 | 6020 300 | 6035 301 | 6036 302 | 6037 303 | 6050 304 | 6051 305 | 6059 306 | 6060 307 | 6061 308 | 6062 309 | 6063 310 | 6064 311 | 6065 312 | 6066 313 | 6067 314 | 6068 315 | 6069 316 | 6070 317 | 6071 318 | 6072 319 | 6073 320 | 6074 321 | 6097 322 | 6110 323 | 6111 324 | 6112 325 | 6113 326 | 6114 327 | 6115 328 | 6116 329 | 6132 330 | 6133 331 | 6134 332 | 6159 333 | 6160 334 | 6161 335 | 6172 336 | 6173 337 | 6185 338 | 6186 339 | 6187 340 | 6188 341 | 6189 342 | 6190 343 | 6191 344 | 6213 345 | 6214 346 | 6215 347 | 6231 348 | 6232 349 | 6251 350 | 6252 351 | 6253 352 | 6281 353 | 6282 354 | 6283 355 | 6284 356 | 6300 357 | 6301 358 | 6316 359 | 6317 360 | 6318 361 | 6319 362 | 6320 363 | 6321 364 | 6322 365 | 6323 366 | 6324 367 | 6325 368 | 6326 369 | 6327 370 | 6328 371 | 6341 372 | 6342 373 | 6343 374 | 6344 375 | 6345 376 | 6346 377 | 6357 378 | 6358 379 | 6359 380 | 6360 381 | 6361 382 | 6362 383 | 6363 384 | 6370 385 | 6371 386 | 6372 387 | 6373 388 | 6374 389 | 6375 390 | 6376 391 | 6377 392 | 6378 393 | 6379 394 | 6380 395 | 6381 396 | 6382 397 | 6383 398 | 6402 399 | 6403 400 | 6404 401 | 6405 402 | 6406 403 | 6425 404 | 6426 405 | 6427 406 | 6428 407 | 6429 408 | 6434 409 | 6435 410 | 6436 411 | 6437 412 | 6438 413 | 6439 414 | 6440 415 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/test.txt: -------------------------------------------------------------------------------- 1 | coast_natu975 2 | insidecity_art947 3 | insidecity_urb781 4 | highway_bost374 5 | coast_n203085 6 | insidecity_a223049 7 | mountain_nat116 8 | street_art861 9 | mountain_land188 10 | street_par177 11 | opencountry_natu524 12 | forest_natu29 13 | highway_gre37 14 | street_bost77 15 | insidecity_art1125 16 | street_urb521 17 | highway_bost178 18 | street_art760 19 | street_urb885 20 | insidecity_art829 21 | coast_natu804 22 | mountain_sharp44 23 | coast_natu649 24 | opencountry_land691 25 | insidecity_hous35 26 | tallbuilding_art1719 27 | mountain_n736026 28 | mountain_moun41 29 | insidecity_urban992 30 | opencountry_land295 31 | tallbuilding_art527 32 | highway_art238 33 | forest_for114 34 | coast_land296 35 | tallbuilding_sky7 36 | mountain_n44009 37 | tallbuilding_art1316 38 | forest_nat717 39 | highway_bost164 40 | street_par29 41 | forest_natc52 42 | tallbuilding_art1004 43 | coast_sun14 44 | opencountry_land206 45 | opencountry_land364 46 | mountain_n219015 47 | highway_a836030 48 | forest_nat324 49 | opencountry_land493 50 | insidecity_art1598 51 | street_street27 52 | insidecity_a48009 53 | coast_cdmc889 54 | street_gre295 55 | tallbuilding_a538076 56 | street_boston378 57 | highway_urb759 58 | street_par151 59 | tallbuilding_urban1003 60 | tallbuilding_urban16 61 | highway_bost151 62 | opencountry_nat965 63 | highway_gre661 64 | forest_for42 65 | opencountry_n18002 66 | insidecity_art646 67 | highway_gre55 68 | coast_n295051 69 | forest_bost103 70 | highway_n480036 71 | mountain_land4 72 | forest_nat130 73 | coast_nat643 74 | insidecity_urb250 75 | street_gre11 76 | street_boston271 77 | opencountry_n490003 78 | mountain_nat762 79 | street_par86 80 | coast_arnat59 81 | mountain_land787 82 | highway_gre472 83 | opencountry_tell67 84 | mountain_sharp66 85 | opencountry_land534 86 | insidecity_gre290 87 | highway_bost307 88 | opencountry_n213059 89 | forest_nat220 90 | forest_cdmc348 91 | tallbuilding_art900 92 | insidecity_art569 93 | street_urb200 94 | coast_natu468 95 | coast_n672069 96 | insidecity_hous109 97 | forest_land862 98 | opencountry_natu65 99 | tallbuilding_a805096 100 | opencountry_n291058 101 | forest_natu439 102 | coast_nat799 103 | tallbuilding_urban991 104 | tallbuilding_sky17 105 | opencountry_land638 106 | opencountry_natu563 107 | tallbuilding_urb733 108 | forest_cdmc451 109 | mountain_n371066 110 | mountain_n213081 111 | mountain_nat57 112 | tallbuilding_a463068 113 | forest_natu848 114 | tallbuilding_art306 115 | insidecity_boston92 116 | insidecity_urb584 117 | tallbuilding_urban1126 118 | coast_n286045 119 | street_gre179 120 | coast_nat1091 121 | opencountry_nat615 122 | coast_nat901 123 | forest_cdmc291 124 | mountain_natu568 125 | mountain_n18070 126 | street_bost136 127 | tallbuilding_art425 128 | coast_bea3 129 | tallbuilding_art1616 130 | insidecity_art690 131 | highway_gre492 132 | highway_bost320 133 | forest_nat400 134 | highway_par23 135 | tallbuilding_a212033 136 | forest_natu994 137 | tallbuilding_archi296 138 | highway_gre413 139 | tallbuilding_a279033 140 | insidecity_art1277 141 | coast_cdmc948 142 | forest_for15 143 | street_par68 144 | mountain_natu786 145 | opencountry_open61 146 | opencountry_nat423 147 | mountain_land143 148 | tallbuilding_a487066 149 | tallbuilding_art1751 150 | insidecity_hous79 151 | street_par118 152 | highway_bost293 153 | mountain_n213021 154 | opencountry_nat802 155 | coast_n384099 156 | opencountry_natu998 157 | mountain_n344042 158 | coast_nat1265 159 | forest_text44 160 | forest_for84 161 | insidecity_a807066 162 | opencountry_nat1117 163 | coast_sun42 164 | insidecity_par180 165 | opencountry_land923 166 | highway_art580 167 | street_art1328 168 | coast_cdmc838 169 | opencountry_land660 170 | opencountry_cdmc354 171 | coast_natu825 172 | opencountry_natu38 173 | mountain_nat30 174 | coast_n199066 175 | forest_text124 176 | forest_land222 177 | tallbuilding_city56 178 | tallbuilding_city22 179 | opencountry_fie36 180 | mountain_ski24 181 | coast_cdmc997 182 | insidecity_boston232 183 | opencountry_land575 184 | opencountry_land797 185 | insidecity_urb362 186 | forest_nat1033 187 | mountain_nat891 188 | street_hexp3 189 | tallbuilding_art1474 190 | tallbuilding_urban73 191 | opencountry_natu852 192 | mountain_nat1008 193 | coast_nat294 194 | mountain_sharp20 195 | opencountry_fie14 196 | mountain_land275 197 | forest_land760 198 | coast_land374 199 | mountain_nat426 200 | highway_gre141 -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split): 15 | n = caffe.NetSpec() 16 | pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), 17 | seed=1337) 18 | if split == 'train': 19 | pydata_params['sbdd_dir'] = '../data/sbdd/dataset' 20 | pylayer = 'SBDDSegDataLayer' 21 | else: 22 | pydata_params['voc_dir'] = '../data/pascal/VOC2011' 23 | pylayer = 'VOCSegDataLayer' 24 | n.data, n.label = L.Python(module='voc_layers', layer=pylayer, 25 | ntop=2, param_str=str(pydata_params)) 26 | 27 | # the base net 28 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) 29 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 30 | n.pool1 = max_pool(n.relu1_2) 31 | 32 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 33 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 34 | n.pool2 = max_pool(n.relu2_2) 35 | 36 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 37 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 38 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 39 | n.pool3 = max_pool(n.relu3_3) 40 | 41 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 42 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 43 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 44 | n.pool4 = max_pool(n.relu4_3) 45 | 46 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 47 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 48 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 49 | n.pool5 = max_pool(n.relu5_3) 50 | 51 | # fully conv 52 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 53 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 54 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 55 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 56 | 57 | n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0, 58 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 59 | n.upscore2 = L.Deconvolution(n.score_fr, 60 | convolution_param=dict(num_output=21, kernel_size=4, stride=2, 61 | bias_term=False), 62 | param=[dict(lr_mult=0)]) 63 | 64 | # scale pool4 skip for compatibility 65 | n.scale_pool4 = L.Scale(n.pool4, filler=dict(type='constant', 66 | value=0.01), param=[dict(lr_mult=0)]) 67 | n.score_pool4 = L.Convolution(n.scale_pool4, num_output=21, kernel_size=1, pad=0, 68 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 69 | n.score_pool4c = crop(n.score_pool4, n.upscore2) 70 | n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c, 71 | operation=P.Eltwise.SUM) 72 | n.upscore_pool4 = L.Deconvolution(n.fuse_pool4, 73 | convolution_param=dict(num_output=21, kernel_size=4, stride=2, 74 | bias_term=False), 75 | param=[dict(lr_mult=0)]) 76 | 77 | # scale pool3 skip for compatibility 78 | n.scale_pool3 = L.Scale(n.pool3, filler=dict(type='constant', 79 | value=0.0001), param=[dict(lr_mult=0)]) 80 | n.score_pool3 = L.Convolution(n.scale_pool3, num_output=21, kernel_size=1, pad=0, 81 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 82 | n.score_pool3c = crop(n.score_pool3, n.upscore_pool4) 83 | n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c, 84 | operation=P.Eltwise.SUM) 85 | n.upscore8 = L.Deconvolution(n.fuse_pool3, 86 | convolution_param=dict(num_output=21, kernel_size=16, stride=8, 87 | bias_term=False), 88 | param=[dict(lr_mult=0)]) 89 | 90 | n.score = crop(n.upscore8, n.data) 91 | n.loss = L.SoftmaxWithLoss(n.score, n.label, 92 | loss_param=dict(normalize=False, ignore_label=255)) 93 | 94 | return n.to_proto() 95 | 96 | def make_net(): 97 | with open('train.prototxt', 'w') as f: 98 | f.write(str(fcn('train'))) 99 | 100 | with open('val.prototxt', 'w') as f: 101 | f.write(str(fcn('seg11valid'))) 102 | 103 | if __name__ == '__main__': 104 | make_net() 105 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/net.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | from caffe import layers as L, params as P 3 | from caffe.coord_map import crop 4 | 5 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1): 6 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 7 | num_output=nout, pad=pad, 8 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 9 | return conv, L.ReLU(conv, in_place=True) 10 | 11 | def max_pool(bottom, ks=2, stride=2): 12 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 13 | 14 | def fcn(split): 15 | n = caffe.NetSpec() 16 | n.data, n.sem, n.geo = L.Python(module='siftflow_layers', 17 | layer='SIFTFlowSegDataLayer', ntop=3, 18 | param_str=str(dict(siftflow_dir='../data/sift-flow', 19 | split=split, seed=1337))) 20 | 21 | # the base net 22 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) 23 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 24 | n.pool1 = max_pool(n.relu1_2) 25 | 26 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 27 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 28 | n.pool2 = max_pool(n.relu2_2) 29 | 30 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 31 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 32 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 33 | n.pool3 = max_pool(n.relu3_3) 34 | 35 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 36 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 37 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 38 | n.pool4 = max_pool(n.relu4_3) 39 | 40 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) 41 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) 42 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) 43 | n.pool5 = max_pool(n.relu5_3) 44 | 45 | # fully conv 46 | n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) 47 | n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) 48 | n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) 49 | n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) 50 | 51 | n.score_fr_sem = L.Convolution(n.drop7, num_output=33, kernel_size=1, pad=0, 52 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 53 | n.upscore2_sem = L.Deconvolution(n.score_fr_sem, 54 | convolution_param=dict(num_output=33, kernel_size=4, stride=2, 55 | bias_term=False), 56 | param=[dict(lr_mult=0)]) 57 | 58 | n.score_pool4_sem = L.Convolution(n.pool4, num_output=33, kernel_size=1, pad=0, 59 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 60 | n.score_pool4_semc = crop(n.score_pool4_sem, n.upscore2_sem) 61 | n.fuse_pool4_sem = L.Eltwise(n.upscore2_sem, n.score_pool4_semc, 62 | operation=P.Eltwise.SUM) 63 | n.upscore16_sem = L.Deconvolution(n.fuse_pool4_sem, 64 | convolution_param=dict(num_output=33, kernel_size=32, stride=16, 65 | bias_term=False), 66 | param=[dict(lr_mult=0)]) 67 | 68 | n.score_sem = crop(n.upscore16_sem, n.data) 69 | # loss to make score happy (o.w. loss_sem) 70 | n.loss = L.SoftmaxWithLoss(n.score_sem, n.sem, 71 | loss_param=dict(normalize=False, ignore_label=255)) 72 | 73 | n.score_fr_geo = L.Convolution(n.drop7, num_output=3, kernel_size=1, pad=0, 74 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 75 | 76 | n.upscore2_geo = L.Deconvolution(n.score_fr_geo, 77 | convolution_param=dict(num_output=3, kernel_size=4, stride=2, 78 | bias_term=False), 79 | param=[dict(lr_mult=0)]) 80 | 81 | n.score_pool4_geo = L.Convolution(n.pool4, num_output=3, kernel_size=1, pad=0, 82 | param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) 83 | n.score_pool4_geoc = crop(n.score_pool4_geo, n.upscore2_geo) 84 | n.fuse_pool4_geo = L.Eltwise(n.upscore2_geo, n.score_pool4_geoc, 85 | operation=P.Eltwise.SUM) 86 | n.upscore16_geo = L.Deconvolution(n.fuse_pool4_geo, 87 | convolution_param=dict(num_output=3, kernel_size=32, stride=16, 88 | bias_term=False), 89 | param=[dict(lr_mult=0)]) 90 | 91 | n.score_geo = crop(n.upscore16_geo, n.data) 92 | n.loss_geo = L.SoftmaxWithLoss(n.score_geo, n.geo, 93 | loss_param=dict(normalize=False, ignore_label=255)) 94 | 95 | return n.to_proto() 96 | 97 | def make_net(): 98 | with open('trainval.prototxt', 'w') as f: 99 | f.write(str(fcn('trainval'))) 100 | 101 | with open('test.prototxt', 'w') as f: 102 | f.write(str(fcn('test'))) 103 | 104 | if __name__ == '__main__': 105 | make_net() 106 | -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/siftflow_layers.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | 3 | import numpy as np 4 | from PIL import Image 5 | import scipy.io 6 | 7 | import random 8 | 9 | class SIFTFlowSegDataLayer(caffe.Layer): 10 | """ 11 | Load (input image, label image) pairs from SIFT Flow 12 | one-at-a-time while reshaping the net to preserve dimensions. 13 | 14 | This data layer has three tops: 15 | 16 | 1. the data, pre-processed 17 | 2. the semantic labels 0-32 and void 255 18 | 3. the geometric labels 0-2 and void 255 19 | 20 | Use this to feed data to a fully convolutional network. 21 | """ 22 | 23 | def setup(self, bottom, top): 24 | """ 25 | Setup data layer according to parameters: 26 | 27 | - siftflow_dir: path to SIFT Flow dir 28 | - split: train / val / test 29 | - randomize: load in random order (default: True) 30 | - seed: seed for randomization (default: None / current time) 31 | 32 | for semantic segmentation of object and geometric classes. 33 | 34 | example: params = dict(siftflow_dir="/path/to/siftflow", split="val") 35 | """ 36 | # config 37 | params = eval(self.param_str) 38 | self.siftflow_dir = params['siftflow_dir'] 39 | self.split = params['split'] 40 | self.mean = np.array((114.578, 115.294, 108.353), dtype=np.float32) 41 | self.random = params.get('randomize', True) 42 | self.seed = params.get('seed', None) 43 | 44 | # three tops: data, semantic, geometric 45 | if len(top) != 3: 46 | raise Exception("Need to define three tops: data, semantic label, and geometric label.") 47 | # data layers have no bottoms 48 | if len(bottom) != 0: 49 | raise Exception("Do not define a bottom.") 50 | 51 | # load indices for images and labels 52 | split_f = '{}/{}.txt'.format(self.siftflow_dir, self.split) 53 | self.indices = open(split_f, 'r').read().splitlines() 54 | self.idx = 0 55 | 56 | # make eval deterministic 57 | if 'train' not in self.split: 58 | self.random = False 59 | 60 | # randomization: seed and pick 61 | if self.random: 62 | random.seed(self.seed) 63 | self.idx = random.randint(0, len(self.indices)-1) 64 | 65 | def reshape(self, bottom, top): 66 | # load image + label image pair 67 | self.data = self.load_image(self.indices[self.idx]) 68 | self.label_semantic = self.load_label(self.indices[self.idx], label_type='semantic') 69 | self.label_geometric = self.load_label(self.indices[self.idx], label_type='geometric') 70 | # reshape tops to fit (leading 1 is for batch dimension) 71 | top[0].reshape(1, *self.data.shape) 72 | top[1].reshape(1, *self.label_semantic.shape) 73 | top[2].reshape(1, *self.label_geometric.shape) 74 | 75 | def forward(self, bottom, top): 76 | # assign output 77 | top[0].data[...] = self.data 78 | top[1].data[...] = self.label_semantic 79 | top[2].data[...] = self.label_geometric 80 | 81 | # pick next input 82 | if self.random: 83 | self.idx = random.randint(0, len(self.indices)-1) 84 | else: 85 | self.idx += 1 86 | if self.idx == len(self.indices): 87 | self.idx = 0 88 | 89 | def backward(self, top, propagate_down, bottom): 90 | pass 91 | 92 | def load_image(self, idx): 93 | """ 94 | Load input image and preprocess for Caffe: 95 | - cast to float 96 | - switch channels RGB -> BGR 97 | - subtract mean 98 | - transpose to channel x height x width order 99 | """ 100 | im = Image.open('{}/Images/spatial_envelope_256x256_static_8outdoorcategories/{}.jpg'.format(self.siftflow_dir, idx)) 101 | in_ = np.array(im, dtype=np.float32) 102 | in_ = in_[:,:,::-1] 103 | in_ -= self.mean 104 | in_ = in_.transpose((2,0,1)) 105 | return in_ 106 | 107 | def load_label(self, idx, label_type=None): 108 | """ 109 | Load label image as 1 x height x width integer array of label indices. 110 | The leading singleton dimension is required by the loss. 111 | """ 112 | if label_type == 'semantic': 113 | label = scipy.io.loadmat('{}/SemanticLabels/spatial_envelope_256x256_static_8outdoorcategories/{}.mat'.format(self.siftflow_dir, idx))['S'] 114 | elif label_type == 'geometric': 115 | label = scipy.io.loadmat('{}/GeoLabels/spatial_envelope_256x256_static_8outdoorcategories/{}.mat'.format(self.siftflow_dir, idx))['S'] 116 | label[label == -1] = 0 117 | else: 118 | raise Exception("Unknown label type: {}. Pick semantic or geometric.".format(label_type)) 119 | label = label.astype(np.uint8) 120 | label -= 1 # rotate labels so classes start at 0, void is 255 121 | label = label[np.newaxis, ...] 122 | return label.copy() 123 | -------------------------------------------------------------------------------- /models/FCNGCN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.nn.init as init 5 | import torch.utils.model_zoo as model_zoo 6 | from torchvision import models 7 | 8 | import math 9 | 10 | 11 | class GCN(nn.Module): 12 | def __init__(self, inplanes, planes, ks=7): 13 | super(GCN, self).__init__() 14 | self.conv_l1 = nn.Conv2d(inplanes, planes, kernel_size=(ks, 1), 15 | padding=(ks/2, 0)) 16 | 17 | self.conv_l2 = nn.Conv2d(planes, planes, kernel_size=(1, ks), 18 | padding=(0, ks/2)) 19 | self.conv_r1 = nn.Conv2d(inplanes, planes, kernel_size=(1, ks), 20 | padding=(0, ks/2)) 21 | self.conv_r2 = nn.Conv2d(planes, planes, kernel_size=(ks, 1), 22 | padding=(ks/2, 0)) 23 | 24 | def forward(self, x): 25 | x_l = self.conv_l1(x) 26 | x_l = self.conv_l2(x_l) 27 | 28 | x_r = self.conv_r1(x) 29 | x_r = self.conv_r2(x_r) 30 | 31 | x = x_l + x_r 32 | 33 | return x 34 | 35 | 36 | class Refine(nn.Module): 37 | def __init__(self, planes): 38 | super(Refine, self).__init__() 39 | self.bn = nn.BatchNorm2d(planes) 40 | self.relu = nn.ReLU(inplace=True) 41 | self.conv1 = nn.Conv2d(planes, planes, kernel_size=3, padding=1) 42 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1) 43 | 44 | def forward(self, x): 45 | residual = x 46 | x = self.bn(x) 47 | x = self.relu(x) 48 | x = self.conv1(x) 49 | x = self.bn(x) 50 | x = self.relu(x) 51 | x = self.conv2(x) 52 | 53 | out = residual + x 54 | return out 55 | 56 | 57 | class FCNGCN(nn.Module): 58 | def __init__(self, num_input_chanel,num_classes): 59 | super(FCNGCN, self).__init__() 60 | 61 | self.num_classes = num_classes 62 | self.num_input_chanel = num_input_chanel 63 | 64 | resnet = models.resnet50(pretrained=True) 65 | 66 | # self.conv1 = resnet.conv1 67 | self.conv1 = nn.Conv2d(num_input_chanel, 64, kernel_size=7, stride=2, padding=3, 68 | bias=False) 69 | self.bn0 = resnet.bn1 70 | self.relu = resnet.relu 71 | self.maxpool = resnet.maxpool 72 | 73 | self.layer1 = resnet.layer1 74 | self.layer2 = resnet.layer2 75 | self.layer3 = resnet.layer3 76 | self.layer4 = resnet.layer4 77 | 78 | self.gcn1 = GCN(2048, self.num_classes) 79 | self.gcn2 = GCN(1024, self.num_classes) 80 | self.gcn3 = GCN(512, self.num_classes) 81 | self.gcn4 = GCN(64, self.num_classes) 82 | self.gcn5 = GCN(64, self.num_classes) 83 | 84 | self.refine1 = Refine(self.num_classes) 85 | self.refine2 = Refine(self.num_classes) 86 | self.refine3 = Refine(self.num_classes) 87 | self.refine4 = Refine(self.num_classes) 88 | self.refine5 = Refine(self.num_classes) 89 | self.refine6 = Refine(self.num_classes) 90 | self.refine7 = Refine(self.num_classes) 91 | self.refine8 = Refine(self.num_classes) 92 | self.refine9 = Refine(self.num_classes) 93 | self.refine10 = Refine(self.num_classes) 94 | 95 | self.out0 = self._classifier(2048) 96 | self.out1 = self._classifier(1024) 97 | self.out2 = self._classifier(512) 98 | self.out_e = self._classifier(256) 99 | self.out3 = self._classifier(64) 100 | self.out4 = self._classifier(64) 101 | self.out5 = self._classifier(32) 102 | 103 | self.transformer = nn.Conv2d(256, 64, kernel_size=1) 104 | 105 | def _classifier(self, inplanes): 106 | return nn.Sequential( 107 | nn.Conv2d(inplanes, inplanes, 3, padding=1, bias=False), 108 | nn.BatchNorm2d(inplanes/2), 109 | nn.ReLU(inplace=True), 110 | nn.Dropout(.1), 111 | nn.Conv2d(inplanes/2, self.num_classes, 1), 112 | ) 113 | 114 | def forward(self, x): 115 | input = x 116 | x = self.conv1(x) 117 | x = self.bn0(x) 118 | x = self.relu(x) 119 | conv_x = x 120 | x = self.maxpool(x) 121 | pool_x = x 122 | 123 | fm1 = self.layer1(x) 124 | fm2 = self.layer2(fm1) 125 | fm3 = self.layer3(fm2) 126 | fm4 = self.layer4(fm3) 127 | 128 | gcfm1 = self.refine1(self.gcn1(fm4)) 129 | gcfm2 = self.refine2(self.gcn2(fm3)) 130 | gcfm3 = self.refine3(self.gcn3(fm2)) 131 | gcfm4 = self.refine4(self.gcn4(pool_x)) 132 | gcfm5 = self.refine5(self.gcn5(conv_x)) 133 | 134 | fs1 = self.refine6(F.upsample_bilinear(gcfm1, fm3.size()[2:]) + gcfm2) 135 | fs2 = self.refine7(F.upsample_bilinear(fs1, fm2.size()[2:]) + gcfm3) 136 | fs3 = self.refine8(F.upsample_bilinear(fs2, pool_x.size()[2:]) + gcfm4) 137 | fs4 = self.refine9(F.upsample_bilinear(fs3, conv_x.size()[2:]) + gcfm5) 138 | out = self.refine10(F.upsample_bilinear(fs4, input.size()[2:])) 139 | 140 | return out 141 | 142 | # return out, fs4, fs3, fs2, fs1, gcfm1 -------------------------------------------------------------------------------- /torchsrc/ext/fcn.berkeleyvision.org/pascalcontext_layers.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | 3 | import numpy as np 4 | from PIL import Image 5 | import scipy.io 6 | 7 | import random 8 | 9 | class PASCALContextSegDataLayer(caffe.Layer): 10 | """ 11 | Load (input image, label image) pairs from PASCAL-Context 12 | one-at-a-time while reshaping the net to preserve dimensions. 13 | 14 | The labels follow the 59 class task defined by 15 | 16 | R. Mottaghi, X. Chen, X. Liu, N.-G. Cho, S.-W. Lee, S. Fidler, R. 17 | Urtasun, and A. Yuille. The Role of Context for Object Detection and 18 | Semantic Segmentation in the Wild. CVPR 2014. 19 | 20 | Use this to feed data to a fully convolutional network. 21 | """ 22 | 23 | def setup(self, bottom, top): 24 | """ 25 | Setup data layer according to parameters: 26 | 27 | - voc_dir: path to PASCAL VOC dir (must contain 2010) 28 | - context_dir: path to PASCAL-Context annotations 29 | - split: train / val / test 30 | - randomize: load in random order (default: True) 31 | - seed: seed for randomization (default: None / current time) 32 | 33 | for PASCAL-Context semantic segmentation. 34 | 35 | example: params = dict(voc_dir="/path/to/PASCAL", split="val") 36 | """ 37 | # config 38 | params = eval(self.param_str) 39 | self.voc_dir = params['voc_dir'] + '/VOC2010' 40 | self.context_dir = params['context_dir'] 41 | self.split = params['split'] 42 | self.mean = np.array((104.007, 116.669, 122.679), dtype=np.float32) 43 | self.random = params.get('randomize', True) 44 | self.seed = params.get('seed', None) 45 | 46 | # load labels and resolve inconsistencies by mapping to full 400 labels 47 | self.labels_400 = [label.replace(' ','') for idx, label in np.genfromtxt(self.context_dir + '/labels.txt', delimiter=':', dtype=None)] 48 | self.labels_59 = [label.replace(' ','') for idx, label in np.genfromtxt(self.context_dir + '/59_labels.txt', delimiter=':', dtype=None)] 49 | for main_label, task_label in zip(('table', 'bedclothes', 'cloth'), ('diningtable', 'bedcloth', 'clothes')): 50 | self.labels_59[self.labels_59.index(task_label)] = main_label 51 | 52 | # two tops: data and label 53 | if len(top) != 2: 54 | raise Exception("Need to define two tops: data and label.") 55 | # data layers have no bottoms 56 | if len(bottom) != 0: 57 | raise Exception("Do not define a bottom.") 58 | 59 | # load indices for images and labels 60 | split_f = '{}/ImageSets/Main/{}.txt'.format(self.voc_dir, 61 | self.split) 62 | self.indices = open(split_f, 'r').read().splitlines() 63 | self.idx = 0 64 | 65 | # make eval deterministic 66 | if 'train' not in self.split: 67 | self.random = False 68 | 69 | # randomization: seed and pick 70 | if self.random: 71 | random.seed(self.seed) 72 | self.idx = random.randint(0, len(self.indices)-1) 73 | 74 | def reshape(self, bottom, top): 75 | # load image + label image pair 76 | self.data = self.load_image(self.indices[self.idx]) 77 | self.label = self.load_label(self.indices[self.idx]) 78 | # reshape tops to fit (leading 1 is for batch dimension) 79 | top[0].reshape(1, *self.data.shape) 80 | top[1].reshape(1, *self.label.shape) 81 | 82 | def forward(self, bottom, top): 83 | # assign output 84 | top[0].data[...] = self.data 85 | top[1].data[...] = self.label 86 | 87 | # pick next input 88 | if self.random: 89 | self.idx = random.randint(0, len(self.indices)-1) 90 | else: 91 | self.idx += 1 92 | if self.idx == len(self.indices): 93 | self.idx = 0 94 | 95 | def backward(self, top, propagate_down, bottom): 96 | pass 97 | 98 | def load_image(self, idx): 99 | """ 100 | Load input image and preprocess for Caffe: 101 | - cast to float 102 | - switch channels RGB -> BGR 103 | - subtract mean 104 | - transpose to channel x height x width order 105 | """ 106 | im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx)) 107 | in_ = np.array(im, dtype=np.float32) 108 | in_ = in_[:,:,::-1] 109 | in_ -= self.mean 110 | in_ = in_.transpose((2,0,1)) 111 | return in_ 112 | 113 | def load_label(self, idx): 114 | """ 115 | Load label image as 1 x height x width integer array of label indices. 116 | The leading singleton dimension is required by the loss. 117 | The full 400 labels are translated to the 59 class task labels. 118 | """ 119 | label_400 = scipy.io.loadmat('{}/trainval/{}.mat'.format(self.context_dir, idx))['LabelMap'] 120 | label = np.zeros_like(label_400, dtype=np.uint8) 121 | for idx, l in enumerate(self.labels_59): 122 | idx_400 = self.labels_400.index(l) + 1 123 | label[label_400 == idx_400] = idx + 1 124 | label = label[np.newaxis, ...] 125 | return label 126 | --------------------------------------------------------------------------------