├── lib ├── __init__.py ├── nn │ ├── __init__.py │ ├── parallel │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── data_parallel.cpython-35.pyc │ │ │ └── data_parallel.cpython-37.pyc │ │ └── data_parallel.py │ ├── sync_batchnorm │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── comm.cpython-35.pyc │ │ │ ├── comm.cpython-37.pyc │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── batchnorm.cpython-35.pyc │ │ │ ├── batchnorm.cpython-37.pyc │ │ │ ├── replicate.cpython-35.pyc │ │ │ └── replicate.cpython-37.pyc │ │ ├── unit_test.py │ │ ├── tests │ │ │ ├── test_numeric_batchnorm.py │ │ │ └── test_sync_batchnorm.py │ │ ├── replicate.py │ │ └── comm.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── optimizer.cpython-35.pyc │ │ └── optimizer.cpython-37.pyc │ └── optimizer.py └── __pycache__ │ ├── __init__.cpython-35.pyc │ └── __init__.cpython-37.pyc ├── spml ├── __init__.py ├── config │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── default.cpython-35.pyc │ │ ├── default.cpython-37.pyc │ │ ├── parse_args.cpython-35.pyc │ │ └── parse_args.cpython-37.pyc │ ├── parse_args.py │ └── default.py ├── data │ ├── __init__.py │ ├── datasets │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── base_dataset.cpython-35.pyc │ │ │ ├── base_dataset.cpython-37.pyc │ │ │ ├── coco_dataset.cpython-35.pyc │ │ │ ├── densepose_dataset.cpython-35.pyc │ │ │ ├── densepose_dataset.cpython-37.pyc │ │ │ ├── list_tag_dataset.cpython-35.pyc │ │ │ └── list_tag_dataset.cpython-37.pyc │ │ ├── densepose_dataset.py │ │ └── base_dataset.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── transforms.cpython-35.pyc │ │ └── transforms.cpython-37.pyc │ └── transforms.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── resnet.cpython-35.pyc │ │ │ ├── resnet.cpython-37.pyc │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── resnet_fpn.cpython-35.pyc │ │ │ └── resnet_caffe.cpython-35.pyc │ │ └── resnet.py │ ├── heads │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── fcn.cpython-35.pyc │ │ │ ├── fpn.cpython-35.pyc │ │ │ ├── spp.cpython-35.pyc │ │ │ ├── spp.cpython-37.pyc │ │ │ ├── __init__.cpython-35.pyc │ │ │ └── __init__.cpython-37.pyc │ │ └── spp.py │ ├── embeddings │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── base_model.cpython-35.pyc │ │ │ ├── base_model.cpython-37.pyc │ │ │ ├── resnet_fcn.cpython-35.pyc │ │ │ ├── resnet_fcn.cpython-37.pyc │ │ │ ├── resnet_fpn.cpython-35.pyc │ │ │ ├── local_model.cpython-35.pyc │ │ │ ├── local_model.cpython-37.pyc │ │ │ ├── resnet_pspnet.cpython-35.pyc │ │ │ ├── resnet_pspnet.cpython-37.pyc │ │ │ ├── resnet_deeplab.cpython-35.pyc │ │ │ ├── resnet_deeplab.cpython-37.pyc │ │ │ ├── resnet_deeplab_caffe.cpython-35.pyc │ │ │ ├── resnet_pspnet_densepose.cpython-35.pyc │ │ │ └── resnet_pspnet_densepose.cpython-37.pyc │ │ ├── base_model.py │ │ └── local_model.py │ ├── predictions │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── segsort.cpython-35.pyc │ │ │ ├── segsort.cpython-37.pyc │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── pixel_model.cpython-35.pyc │ │ │ ├── segsort_cam.cpython-35.pyc │ │ │ ├── segsort_cam.cpython-37.pyc │ │ │ ├── segsort_wsup.cpython-35.pyc │ │ │ ├── segsort_densepose.cpython-35.pyc │ │ │ ├── segsort_softmax.cpython-35.pyc │ │ │ ├── segsort_softmax.cpython-37.pyc │ │ │ ├── softmax_classifier.cpython-35.pyc │ │ │ └── softmax_classifier.cpython-37.pyc │ │ └── softmax_classifier.py │ ├── __pycache__ │ │ ├── crf.cpython-35.pyc │ │ ├── crf.cpython-37.pyc │ │ ├── utils.cpython-35.pyc │ │ ├── utils.cpython-37.pyc │ │ ├── __init__.cpython-35.pyc │ │ └── __init__.cpython-37.pyc │ └── crf.py ├── utils │ ├── __init__.py │ ├── general │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── vis.cpython-35.pyc │ │ │ ├── vis.cpython-37.pyc │ │ │ ├── common.cpython-35.pyc │ │ │ ├── common.cpython-37.pyc │ │ │ ├── others.cpython-35.pyc │ │ │ ├── others.cpython-37.pyc │ │ │ ├── train.cpython-35.pyc │ │ │ ├── train.cpython-37.pyc │ │ │ ├── __init__.cpython-35.pyc │ │ │ └── __init__.cpython-37.pyc │ │ ├── train.py │ │ ├── others.py │ │ ├── vis.py │ │ └── common.py │ ├── segsort │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── eval.cpython-35.pyc │ │ │ ├── eval.cpython-37.pyc │ │ │ ├── loss.cpython-35.pyc │ │ │ ├── loss.cpython-37.pyc │ │ │ ├── common.cpython-35.pyc │ │ │ ├── common.cpython-37.pyc │ │ │ ├── others.cpython-35.pyc │ │ │ ├── others.cpython-37.pyc │ │ │ ├── train.cpython-35.pyc │ │ │ ├── __init__.cpython-35.pyc │ │ │ └── __init__.cpython-37.pyc │ │ ├── others.py │ │ └── eval.py │ └── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ └── __init__.cpython-37.pyc └── __pycache__ │ ├── __init__.cpython-35.pyc │ └── __init__.cpython-37.pyc ├── misc ├── main.png ├── teaser.png └── colormapvoc.mat ├── requirements.txt ├── LICENSE ├── configs ├── voc12_template.yaml └── densepose_template.yaml ├── our_requirements.txt └── pyscripts ├── benchmark ├── benchmark_by_mIoU.py └── benchmark_by_instance.py ├── inference ├── inference_softmax.py ├── inference_softmax_msc.py ├── inference_softmax_crf_msc.py ├── inference_softmax_crf.py ├── pseudo_camrw_crf.py ├── pseudo_softmaxrw_crf.py ├── prototype_msc.py └── pseudo_softmax.py └── train ├── train_densepose_classifier.py └── train_classifier.py /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/nn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/nn/parallel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/models/heads/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/utils/general/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/utils/segsort/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/models/embeddings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spml/models/predictions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /misc/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/misc/main.png -------------------------------------------------------------------------------- /misc/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/misc/teaser.png -------------------------------------------------------------------------------- /misc/colormapvoc.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/misc/colormapvoc.mat -------------------------------------------------------------------------------- /lib/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /spml/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nn/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nn/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nn/__pycache__/optimizer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/__pycache__/optimizer.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nn/__pycache__/optimizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/__pycache__/optimizer.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/__pycache__/crf.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/__pycache__/crf.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/__pycache__/crf.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/__pycache__/crf.cpython-37.pyc -------------------------------------------------------------------------------- /spml/data/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/data/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch >= 1.6 2 | numpy 3 | scipy 4 | tqdm 5 | easydict == 1.9 6 | PyYAML 7 | PIL 8 | opencv 9 | pydensecrf 10 | -------------------------------------------------------------------------------- /spml/config/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/config/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/config/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/config/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /spml/config/__pycache__/default.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/config/__pycache__/default.cpython-35.pyc -------------------------------------------------------------------------------- /spml/config/__pycache__/default.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/config/__pycache__/default.cpython-37.pyc -------------------------------------------------------------------------------- /spml/data/__pycache__/transforms.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/__pycache__/transforms.cpython-35.pyc -------------------------------------------------------------------------------- /spml/data/__pycache__/transforms.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/__pycache__/transforms.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/heads/__pycache__/fcn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/heads/__pycache__/fcn.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/heads/__pycache__/fpn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/heads/__pycache__/fpn.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/heads/__pycache__/spp.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/heads/__pycache__/spp.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/heads/__pycache__/spp.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/heads/__pycache__/spp.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /spml/config/__pycache__/parse_args.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/config/__pycache__/parse_args.cpython-35.pyc -------------------------------------------------------------------------------- /spml/config/__pycache__/parse_args.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/config/__pycache__/parse_args.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/general/__pycache__/vis.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/general/__pycache__/vis.cpython-35.pyc -------------------------------------------------------------------------------- /spml/utils/general/__pycache__/vis.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/general/__pycache__/vis.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/segsort/__pycache__/eval.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/segsort/__pycache__/eval.cpython-35.pyc -------------------------------------------------------------------------------- /spml/utils/segsort/__pycache__/eval.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/segsort/__pycache__/eval.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/segsort/__pycache__/loss.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/segsort/__pycache__/loss.cpython-35.pyc -------------------------------------------------------------------------------- /spml/utils/segsort/__pycache__/loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/segsort/__pycache__/loss.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nn/parallel/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/parallel/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nn/parallel/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/parallel/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/__pycache__/comm.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/sync_batchnorm/__pycache__/comm.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/__pycache__/comm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/sync_batchnorm/__pycache__/comm.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/heads/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/heads/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/heads/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/heads/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/general/__pycache__/common.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/general/__pycache__/common.cpython-35.pyc -------------------------------------------------------------------------------- /spml/utils/general/__pycache__/common.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/general/__pycache__/common.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/general/__pycache__/others.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/general/__pycache__/others.cpython-35.pyc -------------------------------------------------------------------------------- /spml/utils/general/__pycache__/others.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/general/__pycache__/others.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/general/__pycache__/train.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/general/__pycache__/train.cpython-35.pyc -------------------------------------------------------------------------------- /spml/utils/general/__pycache__/train.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/general/__pycache__/train.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/segsort/__pycache__/common.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/segsort/__pycache__/common.cpython-35.pyc -------------------------------------------------------------------------------- /spml/utils/segsort/__pycache__/common.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/segsort/__pycache__/common.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/segsort/__pycache__/others.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/segsort/__pycache__/others.cpython-35.pyc -------------------------------------------------------------------------------- /spml/utils/segsort/__pycache__/others.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/segsort/__pycache__/others.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/segsort/__pycache__/train.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/segsort/__pycache__/train.cpython-35.pyc -------------------------------------------------------------------------------- /spml/data/datasets/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/datasets/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/data/datasets/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/datasets/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/backbones/__pycache__/resnet.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/backbones/__pycache__/resnet.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/backbones/__pycache__/resnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/backbones/__pycache__/resnet.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/general/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/general/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/utils/general/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/general/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /spml/utils/segsort/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/segsort/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/utils/segsort/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/utils/segsort/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nn/parallel/__pycache__/data_parallel.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/parallel/__pycache__/data_parallel.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nn/parallel/__pycache__/data_parallel.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/parallel/__pycache__/data_parallel.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/sync_batchnorm/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/sync_batchnorm/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/__pycache__/batchnorm.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/sync_batchnorm/__pycache__/batchnorm.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/__pycache__/batchnorm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/sync_batchnorm/__pycache__/batchnorm.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/__pycache__/replicate.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/sync_batchnorm/__pycache__/replicate.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/__pycache__/replicate.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/lib/nn/sync_batchnorm/__pycache__/replicate.cpython-37.pyc -------------------------------------------------------------------------------- /spml/data/datasets/__pycache__/base_dataset.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/datasets/__pycache__/base_dataset.cpython-35.pyc -------------------------------------------------------------------------------- /spml/data/datasets/__pycache__/base_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/datasets/__pycache__/base_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /spml/data/datasets/__pycache__/coco_dataset.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/datasets/__pycache__/coco_dataset.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/backbones/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/backbones/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/backbones/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/backbones/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/segsort.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/segsort.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/segsort.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/segsort.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/backbones/__pycache__/resnet_fpn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/backbones/__pycache__/resnet_fpn.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/base_model.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/base_model.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/base_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/base_model.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/resnet_fcn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/resnet_fcn.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/resnet_fcn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/resnet_fcn.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/resnet_fpn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/resnet_fpn.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /spml/data/datasets/__pycache__/densepose_dataset.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/datasets/__pycache__/densepose_dataset.cpython-35.pyc -------------------------------------------------------------------------------- /spml/data/datasets/__pycache__/densepose_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/datasets/__pycache__/densepose_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /spml/data/datasets/__pycache__/list_tag_dataset.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/datasets/__pycache__/list_tag_dataset.cpython-35.pyc -------------------------------------------------------------------------------- /spml/data/datasets/__pycache__/list_tag_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/data/datasets/__pycache__/list_tag_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/backbones/__pycache__/resnet_caffe.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/backbones/__pycache__/resnet_caffe.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/local_model.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/local_model.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/local_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/local_model.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/resnet_pspnet.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/resnet_pspnet.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/resnet_pspnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/resnet_pspnet.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/pixel_model.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/pixel_model.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/segsort_cam.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/segsort_cam.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/segsort_cam.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/segsort_cam.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/segsort_wsup.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/segsort_wsup.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/resnet_deeplab.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/resnet_deeplab.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/resnet_deeplab.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/resnet_deeplab.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/segsort_densepose.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/segsort_densepose.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/segsort_softmax.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/segsort_softmax.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/segsort_softmax.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/segsort_softmax.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/resnet_deeplab_caffe.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/resnet_deeplab_caffe.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/softmax_classifier.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/softmax_classifier.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/predictions/__pycache__/softmax_classifier.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/predictions/__pycache__/softmax_classifier.cpython-37.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/resnet_pspnet_densepose.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/resnet_pspnet_densepose.cpython-35.pyc -------------------------------------------------------------------------------- /spml/models/embeddings/__pycache__/resnet_pspnet_densepose.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twke18/SPML/HEAD/spml/models/embeddings/__pycache__/resnet_pspnet_densepose.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/unit_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : unittest.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import unittest 12 | 13 | import numpy as np 14 | from torch.autograd import Variable 15 | 16 | 17 | def as_numpy(v): 18 | if isinstance(v, Variable): 19 | v = v.data 20 | return v.cpu().numpy() 21 | 22 | 23 | class TorchTestCase(unittest.TestCase): 24 | def assertTensorClose(self, a, b, atol=1e-3, rtol=1e-3): 25 | npa, npb = as_numpy(a), as_numpy(b) 26 | self.assertTrue( 27 | np.allclose(npa, npb, atol=atol), 28 | 'Tensor close check failed\n{}\n{}\nadiff={}, rdiff={}'.format(a, b, np.abs(npa - npb).max(), np.abs((npa - npb) / np.fmax(npa, 1e-5)).max()) 29 | ) 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Tsung-Wei Ke, Jyh-Jing Hwang and Stella X. Yu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /spml/models/crf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # 4 | # Author: Kazuto Nakashima 5 | # URL: https://kazuto1011.github.io 6 | # Date: 09 January 2019 7 | 8 | 9 | import numpy as np 10 | import pydensecrf.densecrf as dcrf 11 | import pydensecrf.utils as utils 12 | 13 | 14 | class DenseCRF(object): 15 | def __init__(self, iter_max, pos_w, pos_xy_std, bi_w, bi_xy_std, bi_rgb_std): 16 | self.iter_max = iter_max 17 | self.pos_w = pos_w 18 | self.pos_xy_std = pos_xy_std 19 | self.bi_w = bi_w 20 | self.bi_xy_std = bi_xy_std 21 | self.bi_rgb_std = bi_rgb_std 22 | 23 | def __call__(self, image, probmap): 24 | C, H, W = probmap.shape 25 | 26 | U = utils.unary_from_softmax(probmap) 27 | U = np.ascontiguousarray(U) 28 | 29 | image = np.ascontiguousarray(image) 30 | 31 | d = dcrf.DenseCRF2D(W, H, C) 32 | d.setUnaryEnergy(U) 33 | d.addPairwiseGaussian(sxy=self.pos_xy_std, compat=self.pos_w) 34 | d.addPairwiseBilateral( 35 | sxy=self.bi_xy_std, srgb=self.bi_rgb_std, rgbim=image, compat=self.bi_w 36 | ) 37 | 38 | Q = d.inference(self.iter_max) 39 | Q = np.array(Q).reshape((C, H, W)) 40 | 41 | return Q 42 | -------------------------------------------------------------------------------- /spml/utils/segsort/others.py: -------------------------------------------------------------------------------- 1 | """Utility functions. 2 | """ 3 | 4 | import os 5 | import glob 6 | 7 | import numpy as np 8 | import torch 9 | 10 | 11 | def load_memory_banks(memory_dir): 12 | """Return prototypes and labels save in the directory. 13 | 14 | Args: 15 | memory_dir: A string indicates the directory where 16 | prototypes are stored. 17 | The dir layout should look like: 18 | memory_dir --- prototype_1.npy 19 | |-- protoytpe_2.npy 20 | 21 | Returns: 22 | A 2-D float tensor of shape `[num_prototypes, num_channels]`; 23 | A 1-D long tensor of shape `[num_prototypes]`. 24 | """ 25 | memory_paths = sorted(glob.glob(os.path.join( 26 | memory_dir, '*.npy'))) 27 | assert(len(memory_paths) > 0), 'No memory stored in the directory' 28 | 29 | prototypes, prototype_labels = [], [] 30 | for memory_path in memory_paths: 31 | datas = np.load(memory_path, allow_pickle=True).item() 32 | prototypes.append(datas['prototype']) 33 | prototype_labels.append(datas['prototype_label']) 34 | 35 | prototypes = np.concatenate(prototypes, 0) 36 | prototype_labels = np.concatenate(prototype_labels, 0) 37 | 38 | prototypes = torch.FloatTensor(prototypes) 39 | prototype_labels = torch.LongTensor(prototype_labels) 40 | 41 | return prototypes, prototype_labels 42 | 43 | -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/tests/test_numeric_batchnorm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : test_numeric_batchnorm.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | 9 | import unittest 10 | 11 | import torch 12 | import torch.nn as nn 13 | from torch.autograd import Variable 14 | 15 | from sync_batchnorm.unittest import TorchTestCase 16 | 17 | 18 | def handy_var(a, unbias=True): 19 | n = a.size(0) 20 | asum = a.sum(dim=0) 21 | as_sum = (a ** 2).sum(dim=0) # a square sum 22 | sumvar = as_sum - asum * asum / n 23 | if unbias: 24 | return sumvar / (n - 1) 25 | else: 26 | return sumvar / n 27 | 28 | 29 | class NumericTestCase(TorchTestCase): 30 | def testNumericBatchNorm(self): 31 | a = torch.rand(16, 10) 32 | bn = nn.BatchNorm2d(10, momentum=1, eps=1e-5, affine=False) 33 | bn.train() 34 | 35 | a_var1 = Variable(a, requires_grad=True) 36 | b_var1 = bn(a_var1) 37 | loss1 = b_var1.sum() 38 | loss1.backward() 39 | 40 | a_var2 = Variable(a, requires_grad=True) 41 | a_mean2 = a_var2.mean(dim=0, keepdim=True) 42 | a_std2 = torch.sqrt(handy_var(a_var2, unbias=False).clamp(min=1e-5)) 43 | # a_std2 = torch.sqrt(a_var2.var(dim=0, keepdim=True, unbiased=False) + 1e-5) 44 | b_var2 = (a_var2 - a_mean2) / a_std2 45 | loss2 = b_var2.sum() 46 | loss2.backward() 47 | 48 | self.assertTensorClose(bn.running_mean, a.mean(dim=0)) 49 | self.assertTensorClose(bn.running_var, handy_var(a)) 50 | self.assertTensorClose(a_var1.data, a_var2.data) 51 | self.assertTensorClose(b_var1.data, b_var2.data) 52 | self.assertTensorClose(a_var1.grad, a_var2.grad) 53 | 54 | 55 | if __name__ == '__main__': 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /configs/voc12_template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | gpus: "GPUS" 3 | num_threads: 4 4 | dataset: 5 | num_classes: 21 6 | semantic_ignore_index: 255 7 | dataset: VOC2012 8 | data_dir: "DATA_ROOT" 9 | train_data_list: "TRAIN_DATA_LIST" 10 | test_data_list: "TEST_DATA_LIST" 11 | color_map_path: "misc/colormapvoc.mat" 12 | 13 | network: 14 | pretrained: "PRETRAINED" 15 | embedding_dim: EMBEDDING_DIM 16 | label_divisor: LABEL_DIVISOR 17 | use_syncbn: USE_SYNCBN 18 | kmeans_iterations: KMEANS_ITERATIONS 19 | kmeans_num_clusters: 20 | - KMEANS_NUM_CLUSTERS 21 | - KMEANS_NUM_CLUSTERS 22 | backbone_types: BACKBONE_TYPES 23 | prediction_types: PREDICTION_TYPES 24 | 25 | train: 26 | resume: false 27 | lr_policy: LR_POLICY 28 | begin_iteration: 0 29 | snapshot_step: SNAPSHOT_STEP 30 | tensorboard_step: 100 31 | max_iteration: MAX_ITERATION 32 | random_mirror: true 33 | random_scale: true 34 | random_crop: true 35 | warmup_iteration: WARMUP_ITERATION 36 | base_lr: LR 37 | weight_decay: WD 38 | momentum: 0.9 39 | batch_size: BATCH_SIZE 40 | crop_size: 41 | - TRAIN_CROP_SIZE 42 | - TRAIN_CROP_SIZE 43 | memory_bank_size: MEMORY_BANK_SIZE 44 | sem_ann_concentration: SEM_ANN_CONCENTRATION 45 | sem_occ_concentration: SEM_OCC_CONCENTRATION 46 | img_sim_concentration: IMG_SIM_CONCENTRATION 47 | feat_aff_concentration: FEAT_AFF_CONCENTRATION 48 | sem_ann_loss_types: SEM_ANN_LOSS_TYPES 49 | sem_occ_loss_types: SEM_OCC_LOSS_TYPES 50 | img_sim_loss_types: IMG_SIM_LOSS_TYPES 51 | feat_aff_loss_types: FEAT_AFF_LOSS_TYPES 52 | sem_ann_loss_weight: SEM_ANN_LOSS_WEIGHT 53 | sem_occ_loss_weight: SEM_OCC_LOSS_WEIGHT 54 | img_sim_loss_weight: IMG_SIM_LOSS_WEIGHT 55 | feat_aff_loss_weight: FEAT_AFF_LOSS_WEIGHT 56 | 57 | test: 58 | scales: 59 | - 1 60 | image_size: TEST_IMAGE_SIZE 61 | crop_size: 62 | - TEST_CROP_SIZE_H 63 | - TEST_CROP_SIZE_W 64 | stride: 65 | - TEST_STRIDE 66 | - TEST_STRIDE 67 | -------------------------------------------------------------------------------- /configs/densepose_template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | gpus: "GPUS" 3 | num_threads: 4 4 | dataset: 5 | num_classes: 15 6 | semantic_ignore_index: 255 7 | dataset: Densepose 8 | data_dir: "DATA_ROOT" 9 | train_data_list: "TRAIN_DATA_LIST" 10 | test_data_list: "TEST_DATA_LIST" 11 | color_map_path: "misc/colormapvoc.mat" 12 | 13 | network: 14 | pretrained: "PRETRAINED" 15 | embedding_dim: EMBEDDING_DIM 16 | label_divisor: LABEL_DIVISOR 17 | use_syncbn: USE_SYNCBN 18 | kmeans_iterations: KMEANS_ITERATIONS 19 | kmeans_num_clusters: 20 | - KMEANS_NUM_CLUSTERS 21 | - KMEANS_NUM_CLUSTERS 22 | backbone_types: BACKBONE_TYPES 23 | prediction_types: PREDICTION_TYPES 24 | 25 | train: 26 | resume: false 27 | lr_policy: LR_POLICY 28 | begin_iteration: 0 29 | snapshot_step: SNAPSHOT_STEP 30 | tensorboard_step: 100 31 | max_iteration: MAX_ITERATION 32 | random_mirror: true 33 | random_scale: true 34 | random_crop: true 35 | warmup_iteration: WARMUP_ITERATION 36 | base_lr: LR 37 | weight_decay: WD 38 | momentum: 0.9 39 | batch_size: BATCH_SIZE 40 | crop_size: 41 | - TRAIN_CROP_SIZE 42 | - TRAIN_CROP_SIZE 43 | memory_bank_size: MEMORY_BANK_SIZE 44 | sem_ann_concentration: SEM_ANN_CONCENTRATION 45 | sem_occ_concentration: SEM_OCC_CONCENTRATION 46 | img_sim_concentration: IMG_SIM_CONCENTRATION 47 | feat_aff_concentration: FEAT_AFF_CONCENTRATION 48 | sem_ann_loss_types: SEM_ANN_LOSS_TYPES 49 | sem_occ_loss_types: SEM_OCC_LOSS_TYPES 50 | img_sim_loss_types: IMG_SIM_LOSS_TYPES 51 | feat_aff_loss_types: FEAT_AFF_LOSS_TYPES 52 | sem_ann_loss_weight: SEM_ANN_LOSS_WEIGHT 53 | sem_occ_loss_weight: SEM_OCC_LOSS_WEIGHT 54 | img_sim_loss_weight: IMG_SIM_LOSS_WEIGHT 55 | feat_aff_loss_weight: FEAT_AFF_LOSS_WEIGHT 56 | 57 | test: 58 | scales: 59 | - 1 60 | image_size: TEST_IMAGE_SIZE 61 | crop_size: 62 | - TEST_CROP_SIZE_H 63 | - TEST_CROP_SIZE_W 64 | stride: 65 | - TEST_STRIDE 66 | - TEST_STRIDE 67 | -------------------------------------------------------------------------------- /spml/utils/general/train.py: -------------------------------------------------------------------------------- 1 | """Utility functions for training. 2 | """ 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | 8 | def lr_poly(base_lr, curr_iter, max_iter, warmup_iter=0, power=0.9): 9 | """Polynomial-decay learning rate policy. 10 | 11 | Args: 12 | base_lr: A scalar indicates initial learning rate. 13 | curr_iter: A scalar indicates current iteration. 14 | max_iter: A scalar indicates maximum iteration. 15 | warmup_iter: A scalar indicates the number of iteration 16 | before which the learning rate is not adjusted. 17 | power: A scalar indicates the decay power. 18 | 19 | Return: 20 | A scalar indicates the current adjusted learning rate. 21 | """ 22 | if curr_iter < warmup_iter: 23 | alpha = curr_iter / warmup_iter 24 | 25 | return min(base_lr * (1 / 10.0 * (1 - alpha) + alpha), 26 | base_lr * ((1 - float(curr_iter) / max_iter)**(power))) 27 | return base_lr * ((1 - float(curr_iter) / max_iter)**(power)) 28 | 29 | 30 | def get_step_index(curr_iter, decay_iters): 31 | """Get step when the learning rate is decayed. 32 | """ 33 | for idx, decay_iter in enumerate(decay_iters): 34 | if curr_iter < decay_iter: 35 | return idx 36 | return len(decay_iters) 37 | 38 | 39 | def lr_step(base_lr, curr_iter, decay_iters, warmup_iter=0): 40 | """Stepwise exponential-decay learning rate policy. 41 | 42 | Args: 43 | base_lr: A scalar indicates initial learning rate. 44 | curr_iter: A scalar indicates current iteration. 45 | decay_iter: A list of scalars indicates the numbers of 46 | iteration when the learning rate is decayed. 47 | warmup_iter: A scalar indicates the number of iteration 48 | before which the learning rate is not adjusted. 49 | 50 | Return: 51 | A scalar indicates the current adjusted learning rate. 52 | """ 53 | if curr_iter < warmup_iter: 54 | alpha = curr_iter / warmup_iter 55 | return base_lr * (1 / 10.0 * (1 - alpha) + alpha) 56 | else: 57 | return base_lr * (0.1 ** get_step_index(curr_iter, decay_iters)) 58 | -------------------------------------------------------------------------------- /spml/models/embeddings/base_model.py: -------------------------------------------------------------------------------- 1 | """Build interface for segmentation models.""" 2 | # This code is borrowed and modified from: 3 | # https://github.com/uber-research/UPSNet/blob/master/upsnet/models/resnet.py 4 | 5 | import warnings 6 | 7 | import torch 8 | import torch.nn as nn 9 | from torch.nn.parameter import Parameter 10 | 11 | 12 | class ResnetBase(nn.Module): 13 | 14 | def name_mapping(self, name, resume=False): 15 | if resume: 16 | return name if not name.startswith('module.') else name[len('module.'):] 17 | 18 | if name.startswith('conv1') or name.startswith('bn1'): 19 | return 'resnet_backbone.conv1.' + name 20 | 21 | return name.replace('layer1', 'resnet_backbone.res2.layers')\ 22 | .replace('layer2', 'resnet_backbone.res3.layers')\ 23 | .replace('layer3', 'resnet_backbone.res4.layers')\ 24 | .replace('layer4', 'resnet_backbone.res5.layers') 25 | 26 | def load_state_dict(self, state_dict, resume=False): 27 | 28 | own_state = self.state_dict() 29 | 30 | for name, param in state_dict.items(): 31 | name = self.name_mapping(name, resume) 32 | 33 | if name not in own_state: 34 | warnings.warn('unexpected key "{}" in state_dict'.format(name)) 35 | continue 36 | 37 | if isinstance(param, Parameter): 38 | # backwards compatibility for serialized parameters 39 | param = param.data 40 | 41 | if own_state[name].shape == param.shape: 42 | own_state[name].copy_(param) 43 | else: 44 | warnings.warn( 45 | 'While copying the parameter named {}, whose dimensions in the' 46 | 'models are {} and whose dimensions in the checkpoint are {}, ' 47 | '...'.format(name, own_state[name].size(), param.size())) 48 | 49 | missing = (set(own_state.keys()) 50 | - set([self.name_mapping(_, resume) for _ in state_dict.keys()])) 51 | if len(missing) > 0: 52 | warnings.warn('missing keys in state_dict: "{}"'.format(missing)) 53 | 54 | def get_params_lr(self): 55 | 56 | raise NotImplementedError() 57 | -------------------------------------------------------------------------------- /spml/config/parse_args.py: -------------------------------------------------------------------------------- 1 | """Parse CLI arguments.""" 2 | 3 | import argparse 4 | 5 | from spml.config.default import config, update_config 6 | 7 | 8 | def parse_args(description=''): 9 | """Parse CLI arguments. 10 | """ 11 | parser = argparse.ArgumentParser(description=description) 12 | # Misc parameters. 13 | parser.add_argument('--snapshot_dir', required=True, type=str, 14 | help='/path/to/snapshot/dir.') 15 | parser.add_argument('--save_dir', type=str, 16 | help='/path/to/save/dir.') 17 | parser.add_argument('--cfg_path', required=True, type=str, 18 | help='/path/to/specific/config/file.') 19 | parser.add_argument('--semantic_memory_dir', type=str, default=None, 20 | help='/path/to/stored/memory/dir.') 21 | parser.add_argument('--cam_dir', type=str, default=None, 22 | help='/path/to/stored/cam/dir.') 23 | parser.add_argument('--data_dir', type=str, default=None, 24 | help='/root/dir/to/data.') 25 | parser.add_argument('--data_list', type=str, default=None, 26 | help='/path/to/data/list.') 27 | # Network parameters. 28 | parser.add_argument('--kmeans_num_clusters', type=str, 29 | help='H,W') 30 | parser.add_argument('--label_divisor', type=int, 31 | help=2048) 32 | # DenseCRF parameters. 33 | parser.add_argument('--crf_iter_max', type=int, default=10, 34 | help='number of iteration for crf.') 35 | parser.add_argument('--crf_pos_xy_std', type=int, default=1, 36 | help='hyper paramter of crf.') 37 | parser.add_argument('--crf_pos_w', type=int, default=3, 38 | help='hyper paramter of crf.') 39 | parser.add_argument('--crf_bi_xy_std', type=int, default=67, 40 | help='hyper paramter of crf.') 41 | parser.add_argument('--crf_bi_w', type=int, default=4, 42 | help='hyper paramter of crf.') 43 | parser.add_argument('--crf_bi_rgb_std', type=int, default=3, 44 | help='hyper paramter of crf.') 45 | 46 | args, rest = parser.parse_known_args() 47 | 48 | # Update config with arguments. 49 | update_config(args.cfg_path) 50 | 51 | args = parser.parse_args() 52 | 53 | return args 54 | -------------------------------------------------------------------------------- /spml/utils/segsort/eval.py: -------------------------------------------------------------------------------- 1 | """Utility functions for eval. 2 | """ 3 | 4 | import torch 5 | 6 | import spml.utils.general.common as common_utils 7 | 8 | 9 | def top_k_ranking(embeddings, 10 | labels, 11 | prototypes, 12 | prototype_labels, 13 | top_k=3): 14 | """Compute top-k accuracy based on embeddings and prototypes 15 | affinity. 16 | 17 | Args: 18 | embeddings: An N-D float tensor with last dimension 19 | as `num_channels`. 20 | labels: An (N-1)-D long tensor. 21 | prototypes: A 2-D float tensor with last dimension as 22 | `num_channels`. 23 | prototype_labels: A 1-D long tensor. 24 | top_k: A scalar indicates number of top-ranked retrievals. 25 | 26 | Returns: 27 | A float scalar indicates accuracy; 28 | A 2-D long tensor indicates retrieved top-k labels. 29 | """ 30 | embeddings = embeddings.view(-1, embeddings.shape[-1]) 31 | prototypes = prototypes.view(-1, prototypes.shape[-1]) 32 | feature_affinity = torch.mm(embeddings, prototypes.t()) 33 | top_k_indices = torch.argsort(feature_affinity, 1, descending=True) 34 | top_k_indices = top_k_indices[:, :top_k].contiguous() 35 | #top_k_indices = top_k_indices[:, 1:top_k+1].contiguous() 36 | 37 | labels = labels.view(-1, 1) 38 | prototype_labels = prototype_labels.view(1, -1) 39 | label_affinity = torch.eq(labels, prototype_labels) 40 | 41 | # Compute top-k accuracy. 42 | top_k_true_positive = torch.gather(label_affinity, 1, top_k_indices) 43 | top_k_accuracy = torch.mean(top_k_true_positive.float()) 44 | 45 | # Retrieve top-k labels. 46 | top_k_labels = torch.gather( 47 | prototype_labels.view(-1), 48 | 0, 49 | top_k_indices.view(-1)) 50 | top_k_labels = top_k_labels.view(-1, top_k) 51 | 52 | return top_k_accuracy, top_k_labels 53 | 54 | 55 | def majority_label_from_topk(top_k_labels, num_classes=None): 56 | """Compute majority label from top-k retrieved labels. 57 | 58 | Args: 59 | top_k_labels: A 2-D long tensor with shape `[num_queries, top_k]`. 60 | 61 | Returns: 62 | A 1-D long tensor with shape `[num_queries]`. 63 | """ 64 | one_hot_top_k_labels = common_utils.one_hot(top_k_labels, 65 | num_classes) 66 | one_hot_top_k_labels = torch.sum(one_hot_top_k_labels, 67 | dim=1) 68 | majority_labels = torch.argmax(one_hot_top_k_labels, 1) 69 | 70 | return majority_labels 71 | -------------------------------------------------------------------------------- /our_requirements.txt: -------------------------------------------------------------------------------- 1 | # This file presents the complete python environment used by authors 2 | # when developing the code, in case that some one comes across this 3 | # repo wanting to reproduce the results. It includes many other 4 | # packages which are not used in the code base. 5 | argon2-cffi==20.1.0 6 | ase==3.20.1 7 | async-generator==1.10 8 | attrs==20.2.0 9 | backcall==0.2.0 10 | bleach==3.2.1 11 | cached-property==1.5.2 12 | certifi==2020.6.20 13 | cffi==1.14.3 14 | chardet==3.0.4 15 | cycler==0.10.0 16 | Cython==0.29.21 17 | decorator==4.4.2 18 | defusedxml==0.6.0 19 | easydict==1.9 20 | entrypoints==0.3 21 | future==0.18.2 22 | h5py==3.0.0 23 | idna==2.10 24 | imageio==2.9.0 25 | importlib-metadata==2.0.0 26 | iniconfig==1.1.1 27 | ipykernel==5.3.4 28 | ipython==7.19.0 29 | ipython-genutils==0.2.0 30 | ipywidgets==7.5.1 31 | isodate==0.6.0 32 | jedi==0.17.2 33 | Jinja2==2.11.2 34 | joblib==0.17.0 35 | jsonschema==3.2.0 36 | jupyter==1.0.0 37 | jupyter-client==6.1.7 38 | jupyter-console==6.2.0 39 | jupyter-core==4.6.3 40 | jupyterlab-pygments==0.1.2 41 | kiwisolver==1.2.0 42 | llvmlite==0.34.0 43 | MarkupSafe==1.1.1 44 | matplotlib==3.3.2 45 | mistune==0.8.4 46 | nbclient==0.5.1 47 | nbconvert==6.0.7 48 | nbformat==5.0.8 49 | nest-asyncio==1.4.2 50 | networkx==2.5 51 | notebook==6.1.4 52 | numba==0.51.2 53 | numpy==1.19.3 54 | opencv-python==4.4.0.44 55 | packaging==20.4 56 | pandas==1.1.4 57 | pandocfilters==1.4.3 58 | parso==0.7.1 59 | pexpect==4.8.0 60 | pickleshare==0.7.5 61 | Pillow==8.0.1 62 | pluggy==0.13.1 63 | prometheus-client==0.8.0 64 | prompt-toolkit==3.0.8 65 | protobuf==3.13.0 66 | ptyprocess==0.6.0 67 | py==1.10.0 68 | pyamg==4.0.0 69 | pybind11==2.6.2 70 | pycocotools==2.0.2 71 | pycparser==2.20 72 | pydensecrf @ git+https://github.com/lucasb-eyer/pydensecrf.git@4d5343c398d75d7ebae34f51a47769084ba3a613 73 | Pygments==2.7.2 74 | pyparsing==2.4.7 75 | pyrsistent==0.17.3 76 | pytest==6.2.2 77 | python-dateutil==2.8.1 78 | pytz==2020.1 79 | PyWavelets==1.1.1 80 | PyYAML==3.12 81 | pyzmq==19.0.2 82 | qtconsole==4.7.7 83 | QtPy==1.9.0 84 | rdflib==5.0.0 85 | requests==2.24.0 86 | scikit-image==0.18.1 87 | scikit-learn==0.24.1 88 | scipy==1.6.1 89 | Send2Trash==1.5.0 90 | six==1.15.0 91 | tensorboardX==2.1 92 | terminado==0.9.1 93 | testpath==0.4.4 94 | threadpoolctl==2.1.0 95 | tifffile==2020.10.1 96 | toml==0.10.2 97 | torch==1.5.0+cu101 98 | torch-cluster==1.5.7 99 | torch-geometric==1.6.1 100 | torch-scatter==2.0.5 101 | torch-sparse==0.6.7 102 | torch-spline-conv==1.2.0 103 | torchvision==0.6.0+cu101 104 | tornado==6.1 105 | tqdm==4.51.0 106 | traitlets==5.0.5 107 | urllib3==1.25.11 108 | wcwidth==0.2.5 109 | webencodings==0.5.1 110 | widgetsnbextension==3.5.1 111 | zipp==3.4.0 112 | -------------------------------------------------------------------------------- /spml/utils/general/others.py: -------------------------------------------------------------------------------- 1 | """Utility functions. 2 | """ 3 | 4 | import numpy as np 5 | import torch 6 | 7 | import spml.data.transforms as transforms 8 | 9 | 10 | def create_image_pyramid(image_batch, label_batch, scales, is_flip=True): 11 | """Create pyramid of images and labels in different scales. 12 | 13 | This function generates image and label pyramid by upscaling 14 | and downscaling the input image and label. 15 | 16 | Args: 17 | image_batch: A dict with entry `image`, which is a 3-D numpy 18 | float tensor of shape `[height, width, channels]`. 19 | label_batch: A dict with entry `semantic_label` and `instance_label`, 20 | which are 2-D numpy long tensor of shape `[height, width]`. 21 | scales: A list of floats indicate the scale ratios. 22 | is_flip: enable/disable flip to augment image & label pyramids 23 | by horizontally flipping. 24 | 25 | Return: 26 | A list of tuples of (image, label, {'is_flip': True/False}). 27 | """ 28 | h, w = image_batch['image'].shape[-2:] 29 | flips = [True, False] if is_flip else [False] 30 | batches = [] 31 | for scale in scales: 32 | for flip in flips: 33 | img = image_batch['image'].transpose(1, 2, 0) 34 | sem_lab = label_batch['semantic_label'] 35 | inst_lab = label_batch['instance_label'] 36 | lab = np.stack([sem_lab, inst_lab], axis=2) 37 | img, lab = transforms.resize(img, lab, scale) 38 | if flip: 39 | img = img[:, ::-1, :] 40 | lab = lab[:, ::-1, :] 41 | img = img.transpose(2, 0, 1) 42 | img_batch = {'image': img} 43 | lab_batch = {'semantic_label': lab[..., 0], 44 | 'instance_label': lab[..., 1]} 45 | data_info = {'is_flip': flip} 46 | batches.append((img_batch, lab_batch, data_info)) 47 | return batches 48 | 49 | 50 | def prepare_datas_and_labels_mgpu(data_iterator, gpu_ids): 51 | """Prepare datas and labels for multi-gpu computation. 52 | 53 | Args: 54 | data_iterator: An Iterator instance of pytorch.DataLoader, which 55 | return a dictionary of `datas`, `labels`, and a scalar of `index`. 56 | gpu_ids: A list of scalars indicates the GPU device ids. 57 | 58 | Return: 59 | A list of tuples of `datas` and `labels`. 60 | """ 61 | input_batch, label_batch = [], [] 62 | for gpu_id in gpu_ids: 63 | data, label, index = data_iterator.next() 64 | for k, v in data.items(): 65 | data[k] = (v if not torch.is_tensor(v) 66 | else v.pin_memory().to(gpu_id, non_blocking=True)) 67 | for k, v in label.items(): 68 | label[k] = (v if not torch.is_tensor(v) 69 | else v.pin_memory().to(gpu_id, non_blocking=True)) 70 | input_batch.append(data) 71 | label_batch.append(label) 72 | 73 | return input_batch, label_batch 74 | -------------------------------------------------------------------------------- /spml/models/heads/spp.py: -------------------------------------------------------------------------------- 1 | """Build Spatial Pyramid Pooling Module.""" 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class ASPP(nn.Module): 9 | 10 | def __init__(self, in_channels, out_channels, 11 | bn=True, relu=True): 12 | """Build Atrous Spatial Pyramid Module for Deeplab. 13 | """ 14 | super(ASPP, self).__init__() 15 | 16 | def create_convs(dilation): 17 | convs = [] 18 | if dilation > 1: 19 | convs.append(nn.Conv2d(in_channels, out_channels, 20 | 3, 1, 21 | padding=dilation, 22 | dilation=dilation, 23 | bias=not bn)) 24 | else: 25 | convs.append(nn.Conv2d(in_channels, out_channels, 26 | 1, 1, 0, 1, bias=not bn)) 27 | if bn: 28 | convs.append(nn.BatchNorm2d(out_channels)) 29 | if relu: 30 | convs.append(nn.ReLU(inplace=True)) 31 | return nn.Sequential(*convs) 32 | 33 | self.aspp_1 = create_convs(6) 34 | self.aspp_2 = create_convs(12) 35 | self.aspp_3 = create_convs(18) 36 | self.aspp_4 = create_convs(24) 37 | 38 | def forward(self, x): 39 | xs = [self.aspp_1(x), self.aspp_2(x), 40 | self.aspp_3(x), self.aspp_4(x)] 41 | #output = torch.cat(xs, dim=1) 42 | output = sum(xs) 43 | return output 44 | 45 | 46 | class PSPP(nn.Module): 47 | 48 | def __init__(self, in_channels, out_channels, 49 | bn=True, relu=True): 50 | """Build Pooling Spatial Pyramid Module for PSPNet. 51 | """ 52 | super(PSPP, self).__init__() 53 | 54 | def create_convs(in_c, out_c, k, size): 55 | convs = [] 56 | if size: 57 | convs.append(nn.AdaptiveAvgPool2d(size)) 58 | p = (k - 1) // 2 59 | convs.append(nn.Conv2d(in_c, out_c, k, 1, p, 1, bias=not bn)) 60 | if bn: 61 | convs.append(nn.BatchNorm2d(out_c)) 62 | if relu: 63 | convs.append(nn.ReLU(inplace=True)) 64 | return nn.Sequential(*convs) 65 | 66 | self.pspp_1 = create_convs(in_channels, out_channels, 1, 1) 67 | self.pspp_2 = create_convs(in_channels, out_channels, 1, 2) 68 | self.pspp_3 = create_convs(in_channels, out_channels, 1, 3) 69 | self.pspp_4 = create_convs(in_channels, out_channels, 1, 6) 70 | self.conv = create_convs( 71 | in_channels + out_channels * 4, out_channels, 3, None) 72 | 73 | def forward(self, x): 74 | size = x.shape[-2:] 75 | x1 = F.interpolate( 76 | self.pspp_1(x), size=size, mode='bilinear') 77 | x2 = F.interpolate( 78 | self.pspp_2(x), size=size, mode='bilinear') 79 | x3 = F.interpolate( 80 | self.pspp_3(x), size=size, mode='bilinear') 81 | x4 = F.interpolate( 82 | self.pspp_4(x), size=size, mode='bilinear') 83 | output = torch.cat([x, x1, x2, x3, x4], dim=1) 84 | output = self.conv(output) 85 | 86 | return output 87 | -------------------------------------------------------------------------------- /spml/utils/general/vis.py: -------------------------------------------------------------------------------- 1 | """Define utility functions for visualization. 2 | """ 3 | 4 | import os 5 | 6 | import torch 7 | import torchvision.utils 8 | import torch.nn.functional as F 9 | import scipy.io 10 | import numpy as np 11 | 12 | import spml.utils.general.common as common_utils 13 | 14 | 15 | def write_image_to_tensorboard(writer, images, size, curr_iter, name='image'): 16 | """Write list of image tensors to tensorboard. 17 | 18 | Args: 19 | writer: An instance of tensorboardX.SummaryWriter 20 | images: A list of 4-D tensors of shape 21 | `[batch_size, channel, height, width]`. 22 | """ 23 | for ind, image in enumerate(images): 24 | if image.shape[-2] != size[0] or image.shape[-1] != size[1]: 25 | image_type = image.dtype 26 | image = F.interpolate(image.float(), size=size, mode='nearest') 27 | images[ind] = image.type(image_type) 28 | 29 | images = torch.cat(images, dim=3) 30 | images = torchvision.utils.make_grid(images, nrow=1) 31 | writer.add_image(name, images, curr_iter) 32 | 33 | 34 | def write_scalars_to_tensorboard(writer, scalars, curr_iter): 35 | """Write dict of scalars to tensorboard. 36 | """ 37 | for key, value in scalars.items(): 38 | writer.add_scalar(key, value, curr_iter) 39 | 40 | 41 | def convert_label_to_color(label, color_map): 42 | """Convert integer label to RGB image. 43 | """ 44 | n, h, w = label.shape 45 | rgb = torch.index_select(color_map, 0, label.view(-1)).view(n, h, w, 3) 46 | rgb = rgb.permute(0, 3, 1, 2) 47 | 48 | return rgb 49 | 50 | 51 | def load_color_map(color_map_path): 52 | """Load color map. 53 | """ 54 | color_map = scipy.io.loadmat(color_map_path) 55 | color_map = color_map[ 56 | os.path.basename(color_map_path).strip('.mat')] 57 | color_map = torch.from_numpy((color_map * 255).astype(np.uint8)) 58 | 59 | return color_map 60 | 61 | 62 | def embedding_to_rgb(embeddings, project_type='pca'): 63 | """Project high-dimension embeddings to RGB colors. 64 | 65 | Args: 66 | embeddings: A 4-D float tensor with shape 67 | `[batch_size, embedding_dim, height, width]`. 68 | project_type: pca | random. 69 | 70 | Returns: 71 | An N-D float tensor with shape `[batch_size, 3, height, width]`. 72 | """ 73 | # Transform NCHW to NHWC. 74 | embeddings = embeddings.permute(0, 2, 3, 1).contiguous() 75 | embeddings = common_utils.normalize_embedding(embeddings) 76 | 77 | N, H, W, C= embeddings.shape 78 | if project_type == 'pca': 79 | rgb = common_utils.pca(embeddings, 3) 80 | elif project_type == 'random': 81 | random_inds = torch.randint(0, 82 | C, 83 | (3,), 84 | dtype=tf.long, 85 | device=embeddings.device) 86 | rgb = torch.index_select(embeddings, -1, random_inds) 87 | else: 88 | raise NotImplementedError() 89 | 90 | # Normalize per image. 91 | rgb = rgb.view(N, -1, 3) 92 | rgb -= torch.min(rgb, 1, keepdim=True)[0] 93 | rgb /= torch.max(rgb, 1, keepdim=True)[0] 94 | rgb *= 255 95 | rgb = rgb.byte() 96 | 97 | # Transform NHWC to NCHW. 98 | rgb = rgb.view(N, H, W, 3) 99 | rgb = rgb.permute(0, 3, 1, 2).contiguous() 100 | 101 | return rgb 102 | -------------------------------------------------------------------------------- /spml/config/default.py: -------------------------------------------------------------------------------- 1 | """Default configuration for SPML.""" 2 | 3 | import yaml 4 | import numpy as np 5 | from easydict import EasyDict as edict 6 | 7 | config = edict() 8 | config.embedding_model = '' 9 | config.prediction_model = '' 10 | config.gpus = '' 11 | config.num_threads = 4 12 | 13 | ## Parameters for network. 14 | config.network = edict() 15 | # Backbone network. 16 | config.network.pixel_means = np.array((0.485, 0.456, 0.406)) 17 | config.network.pixel_stds = np.array((0.229, 0.224, 0.225)) 18 | config.network.pretrained = '' 19 | config.network.use_syncbn = False 20 | config.network.backbone_types = '' 21 | # Spatial Pooling Pyramid module. 22 | config.network.aspp_feature_dim = 512 23 | config.network.pspp_feature_dim = 512 24 | config.network.embedding_dim = 128 25 | config.network.label_divisor = 255 26 | # Spherical KMeans. 27 | config.network.kmeans_iterations = 10 28 | config.network.kmeans_num_clusters = [5, 5] 29 | 30 | ## Parameters for dataset. 31 | config.dataset = edict() 32 | config.dataset.data_dir = '' 33 | config.dataset.train_data_list = '' 34 | config.dataset.test_data_list = '' 35 | config.dataset.color_map_path = '' 36 | config.dataset.num_classes = 0 37 | config.dataset.semantic_ignore_index = 255 38 | 39 | ## Parameters for training. 40 | config.train = edict() 41 | # Data processing. 42 | config.train.lr_policy = 'step' 43 | config.train.random_mirror = True 44 | config.train.random_scale = True 45 | config.train.random_crop = True 46 | config.train.shuffle = True 47 | config.train.resume = False 48 | config.train.begin_iteration = 0 49 | config.train.max_iteration = 0 50 | config.train.warmup_iteration = 0 51 | config.train.decay_iterations = [0] 52 | config.train.snapshot_step = 0 53 | config.train.tensorboard_step = 0 54 | config.train.base_lr = 1e-3 55 | config.train.weight_decay = 5e-3 56 | config.train.momentum = 0.9 57 | config.train.batch_size = 0 58 | config.train.crop_size = [0, 0] 59 | config.train.memory_bank_size = 0 60 | # Losses. 61 | config.train.sem_ann_loss_types = 'none' 62 | config.train.sem_occ_loss_types = 'none' 63 | config.train.img_sim_loss_types = 'none' 64 | config.train.feat_aff_loss_types = 'none' 65 | config.train.sem_ann_concentration = 0 66 | config.train.sem_occ_concentration = 0 67 | config.train.img_sim_concentration = 0 68 | config.train.feat_aff_concentration = 0 69 | config.train.sem_ann_loss_weight= 0.0 70 | config.train.sem_occ_loss_weight= 0.0 71 | config.train.img_sim_loss_weight= 0.0 72 | config.train.feat_aff_loss_weight= 0.0 73 | 74 | ## Parameters for testing. 75 | config.test = edict() 76 | # Data Processing. 77 | config.test.scales = [0] 78 | config.test.image_size = 0 79 | config.test.crop_size = [0, 0] 80 | config.test.stride = [0, 0] 81 | 82 | 83 | def update_config(config_file): 84 | 85 | exp_config = None 86 | with open(config_file) as f: 87 | exp_config = edict(yaml.load(f)) 88 | for k, v in exp_config.items(): 89 | # update default config. 90 | if k in config: 91 | if isinstance(v, dict): 92 | if k == 'train': 93 | if 'base_lr' in v: 94 | v['base_lr'] = float(v['base_lr']) 95 | if 'weight_decay' in v: 96 | v['weight_decay'] = float(v['weight_decay']) 97 | for vk, vv in v.items(): 98 | config[k][vk] = vv 99 | else: 100 | config[k] = v 101 | # insert new config. 102 | else: 103 | config[k] = v 104 | -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/replicate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : replicate.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import functools 12 | 13 | from torch.nn.parallel.data_parallel import DataParallel 14 | 15 | __all__ = [ 16 | 'CallbackContext', 17 | 'execute_replication_callbacks', 18 | 'DataParallelWithCallback', 19 | 'patch_replication_callback' 20 | ] 21 | 22 | 23 | class CallbackContext(object): 24 | pass 25 | 26 | 27 | def execute_replication_callbacks(modules): 28 | """ 29 | Execute an replication callback `__data_parallel_replicate__` on each module created by original replication. 30 | 31 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` 32 | 33 | Note that, as all modules are isomorphism, we assign each sub-module with a context 34 | (shared among multiple copies of this module on different devices). 35 | Through this context, different copies can share some information. 36 | 37 | We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback 38 | of any slave copies. 39 | """ 40 | master_copy = modules[0] 41 | nr_modules = len(list(master_copy.modules())) 42 | ctxs = [CallbackContext() for _ in range(nr_modules)] 43 | 44 | for i, module in enumerate(modules): 45 | for j, m in enumerate(module.modules()): 46 | if hasattr(m, '__data_parallel_replicate__'): 47 | m.__data_parallel_replicate__(ctxs[j], i) 48 | 49 | 50 | class DataParallelWithCallback(DataParallel): 51 | """ 52 | Data Parallel with a replication callback. 53 | 54 | An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by 55 | original `replicate` function. 56 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` 57 | 58 | Examples: 59 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 60 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 61 | # sync_bn.__data_parallel_replicate__ will be invoked. 62 | """ 63 | 64 | def replicate(self, module, device_ids): 65 | modules = super(DataParallelWithCallback, self).replicate(module, device_ids) 66 | execute_replication_callbacks(modules) 67 | return modules 68 | 69 | 70 | def patch_replication_callback(data_parallel): 71 | """ 72 | Monkey-patch an existing `DataParallel` object. Add the replication callback. 73 | Useful when you have customized `DataParallel` implementation. 74 | 75 | Examples: 76 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 77 | > sync_bn = DataParallel(sync_bn, device_ids=[0, 1]) 78 | > patch_replication_callback(sync_bn) 79 | # this is equivalent to 80 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 81 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 82 | """ 83 | 84 | assert isinstance(data_parallel, DataParallel) 85 | 86 | old_replicate = data_parallel.replicate 87 | 88 | @functools.wraps(old_replicate) 89 | def new_replicate(module, device_ids): 90 | modules = old_replicate(module, device_ids) 91 | execute_replication_callbacks(modules) 92 | return modules 93 | 94 | data_parallel.replicate = new_replicate 95 | -------------------------------------------------------------------------------- /spml/models/predictions/softmax_classifier.py: -------------------------------------------------------------------------------- 1 | """Define Softmax Classifier for semantic segmentation. 2 | """ 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import spml.models.utils as model_utils 9 | 10 | 11 | class SoftmaxClassifier(nn.Module): 12 | 13 | def __init__(self, config): 14 | super(SoftmaxClassifier, self).__init__() 15 | self.semantic_classifier = nn.Sequential( 16 | nn.Conv2d(config.network.embedding_dim, 17 | config.network.embedding_dim*2, 18 | #kernel_size=1, 19 | kernel_size=3, 20 | padding=1, 21 | stride=1, 22 | bias=False), 23 | nn.BatchNorm2d(config.network.embedding_dim*2), 24 | nn.ReLU(inplace=True), 25 | nn.Dropout(p=0.65), 26 | nn.Conv2d(config.network.embedding_dim*2, 27 | config.dataset.num_classes, 28 | kernel_size=1, 29 | stride=1, 30 | bias=True)) 31 | self.semantic_loss = nn.CrossEntropyLoss( 32 | ignore_index=config.dataset.semantic_ignore_index) 33 | self.ignore_index = config.dataset.semantic_ignore_index 34 | self.num_classes = config.dataset.num_classes 35 | 36 | 37 | def forward(self, datas, targets=None): 38 | """Predict semantic segmenation and loss. 39 | 40 | Args: 41 | datas: A dict with an entry `embedding`, which is a 4-D float 42 | tensor of shape `[batch_size, num_channels, height, width]`. 43 | targets: A dict with an entry `semantic_label`, which is a 3-D 44 | long tensor of shape `[batch_size, height, width]`. 45 | 46 | Return: 47 | A dict of tensors and scalars. 48 | """ 49 | targets = targets if targets is not None else {} 50 | 51 | # Predict semantic labels. 52 | semantic_embeddings = datas['embedding'] 53 | semantic_embeddings = ( 54 | semantic_embeddings / torch.norm(semantic_embeddings, dim=1, keepdim=True)) 55 | semantic_logits = self.semantic_classifier(semantic_embeddings) 56 | 57 | # Compute semantic loss. 58 | semantic_loss, semantic_acc = None, None 59 | semantic_labels = targets.get('semantic_label', None) 60 | if semantic_labels is not None: 61 | # Upscale logits. 62 | semantic_logits = F.interpolate( 63 | semantic_logits, 64 | size=semantic_labels.shape[-2:], 65 | mode='bilinear') 66 | semantic_pred = torch.argmax(semantic_logits, dim=1) 67 | # Rescale labels to the same size as logits. 68 | #n, h, w = semantic_labels.shape 69 | #semantic_labels = F.interpolate( 70 | # semantic_labels.view(n, 1, h, w).float(), 71 | # size=semantic_embeddings.shape[-2:], 72 | # mode='nearest') 73 | semantic_labels = semantic_labels.masked_fill( 74 | semantic_labels >= self.num_classes, self.ignore_index) 75 | semantic_labels = semantic_labels.squeeze_(1).long() 76 | 77 | semantic_loss = self.semantic_loss(semantic_logits, semantic_labels) 78 | semantic_acc = torch.eq(semantic_pred, semantic_labels) 79 | valid_pixels = torch.ne(semantic_labels, 80 | self.ignore_index) 81 | semantic_acc = torch.masked_select(semantic_acc, valid_pixels).float().mean() 82 | else: 83 | semantic_pred = torch.argmax(semantic_logits, dim=1) 84 | 85 | outputs = {'semantic_prediction': semantic_pred, 86 | 'semantic_logit': semantic_logits, 87 | 'sem_ann_loss': semantic_loss, 88 | 'accuracy': semantic_acc,} 89 | 90 | return outputs 91 | 92 | def get_params_lr(self): 93 | """Helper function to adjust learning rate for each sub modules. 94 | """ 95 | # Specify learning rate for each sub modules. 96 | ret = [] 97 | ret.append({ 98 | 'params': [n for n in model_utils.get_params( 99 | self, 100 | ['semantic_classifier'], 101 | ['weight'])], 102 | 'lr': 10}) 103 | ret.append({ 104 | 'params': [n for n in model_utils.get_params( 105 | self, 106 | ['semantic_classifier'], 107 | ['bias'])], 108 | 'lr': 20, 109 | 'weight_decay': 0}) 110 | 111 | return ret 112 | 113 | 114 | def softmax_classifier(config): 115 | """Pixel semantic segmentation model. 116 | """ 117 | return SoftmaxClassifier(config) 118 | -------------------------------------------------------------------------------- /lib/nn/parallel/data_parallel.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------- 2 | # Unified Panoptic Segmentation Network 3 | # 4 | # Copyright (c) 2018-2019 Uber Technologies, Inc. 5 | # 6 | # Licensed under the Uber Non-Commercial License (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at the root directory of this project. 9 | # 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | # --------------------------------------------------------------------------- 13 | import operator 14 | import warnings 15 | from itertools import chain 16 | 17 | import torch 18 | from torch.nn.parallel.data_parallel import DataParallel as DP 19 | from torch.nn.modules import Module 20 | from torch.nn.parallel.scatter_gather import scatter_kwargs, gather 21 | from torch.nn.parallel.replicate import replicate 22 | from torch.nn.parallel.parallel_apply import parallel_apply 23 | 24 | 25 | def _check_balance(device_ids): 26 | 27 | imbalance_warn = """ 28 | There is an imbalance between your GPUs. You may want to exclude GPU {} which 29 | has less than 75% of the memory or cores of GPU {}. You can do so by setting 30 | the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES 31 | environment variable.""" 32 | 33 | dev_props = [torch.cuda.get_device_properties(i) for i in device_ids] 34 | 35 | def warn_imbalance(get_prop): 36 | 37 | values = [get_prop(props) for props in dev_props] 38 | min_pos, min_val = min(enumerate(values), key=operator.itemgetter(1)) 39 | max_pos, max_val = max(enumerate(values), key=operator.itemgetter(1)) 40 | if min_val / max_val < 0.75: 41 | warnings.warn(imbalance_warn.format(device_ids[min_pos], 42 | device_ids[max_pos])) 43 | return True 44 | 45 | return False 46 | 47 | if warn_imbalance(lambda props: props.total_memory): 48 | return 49 | if warn_imbalance(lambda props: props.multi_processor_count): 50 | return 51 | 52 | 53 | class DataParallel(DP): 54 | r"""Reimplementation of torch.nn.DataParallel, and allows not 55 | gathering outputs at each gpu. 56 | """ 57 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well 58 | def __init__(self, module, device_ids=None, 59 | output_device=None, dim=0, gather_output=True): 60 | 61 | super(DataParallel, self).__init__(module, device_ids, output_device, dim) 62 | 63 | if not torch.cuda.is_available(): 64 | self.module = module 65 | self.device_ids = [] 66 | return 67 | 68 | if device_ids is None: 69 | device_ids = list(range(torch.cuda.device_count())) 70 | if output_device is None: 71 | output_device = device_ids[0] 72 | 73 | self.dim = dim 74 | self.module = module 75 | self.device_ids = device_ids 76 | self.output_device = output_device 77 | self.src_device_obj = torch.device("cuda:{}".format(self.device_ids[0])) 78 | self.gather_output = gather_output 79 | 80 | _check_balance(self.device_ids) 81 | 82 | if len(self.device_ids) == 1: 83 | self.module.cuda(device_ids[0]) 84 | 85 | def forward(self, *inputs, **kwargs): 86 | 87 | # If no device ids specified, fall back to single gpu. 88 | if not self.device_ids: 89 | return self.module(*inputs, **kwargs) 90 | 91 | for t in chain(self.module.parameters(), self.module.buffers()): 92 | if t.device != self.src_device_obj: 93 | raise RuntimeError( 94 | "module must have its parameters and buffers " 95 | "on device {} (device_ids[0]) but found one of " 96 | "them on device: {}".format(self.src_device_obj, t.device)) 97 | 98 | # inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) 99 | assert kwargs == {}, 'not implemented' 100 | kwargs = [{} for _ in range(len(inputs))] 101 | 102 | #if len(self.device_ids) == 1: 103 | # return self.module(*inputs[0], **kwargs[0]) 104 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 105 | outputs = self.parallel_apply(replicas, inputs, kwargs) 106 | if self.gather_output: 107 | return self.gather(outputs, self.output_device) 108 | else: 109 | return outputs 110 | -------------------------------------------------------------------------------- /spml/models/embeddings/local_model.py: -------------------------------------------------------------------------------- 1 | """Create model for producing pixel location and smoothed RGB features.""" 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.nn.parameter import Parameter 7 | import numpy as np 8 | 9 | import spml.utils.general.common as common_utils 10 | import spml.utils.segsort.common as segsort_common 11 | 12 | 13 | class GaussianConv2d(nn.Module): 14 | 15 | def __init__(self, in_channels, out_channels, ksize=5): 16 | """Applies 2-D Gaussian Blur. 17 | 18 | Args: 19 | in_channels: An integer indicates input channel dimension. 20 | out_channels: An integer indicates output channel dimension. 21 | ksize: An integer indicates Gaussian kernel size. 22 | """ 23 | 24 | super(GaussianConv2d, self).__init__() 25 | weight = (np.arange(ksize, dtype=np.float32) - ksize // 2) ** 2 26 | weight = np.sqrt(weight[None, :] + weight[:, None]) 27 | weight = np.reshape(weight, (1, 1, ksize, ksize)) / weight.sum() 28 | self.weight = Parameter( 29 | torch.Tensor(weight).expand(out_channels, -1, -1, -1)) 30 | self._in_channels = in_channels 31 | self._out_channels = out_channels 32 | 33 | def forward(self, x): 34 | with torch.no_grad(): 35 | return F.conv2d(x, self.weight, groups=self._in_channels) 36 | 37 | 38 | class LocationColorNetwork(nn.Module): 39 | 40 | def __init__(self, use_color=True, use_location=True, 41 | norm_color=True, smooth_ksize=None): 42 | """Generates location coordinates and blurred RGB colors. 43 | 44 | Args: 45 | use_color: enable/disable use_color to output RGB colors. 46 | use_location: enable/disable use_location to output location 47 | coordinates. 48 | norm_color: enable/disable norm_color to normalize RGB colors. 49 | If True, scale the maximum and minimum value to 1 and -1. 50 | smooth_ksize: enable/disable smooth_ksize to smooth the RGB 51 | colors. If True, the Gaussian kernel is set to 3. 52 | """ 53 | 54 | super(LocationColorNetwork, self).__init__() 55 | self._use_color = use_color 56 | self._use_location = use_location 57 | self._norm_color = norm_color 58 | self._smooth_ksize = smooth_ksize 59 | if smooth_ksize: 60 | self.smooth_kernel = GaussianConv2d(3, 3, smooth_ksize) 61 | else: 62 | self.smooth_kernel = nn.Identity() 63 | 64 | def __repr__(self): 65 | return ('LocationColorNetwork(use_color={}, use_location={}'+ 66 | ', smooth_ksize={})').format( 67 | self._use_color, self._use_location, self._smooth_ksize) 68 | 69 | def forward(self, x, size=None): 70 | """Genearet location coordinates and color features. 71 | 72 | Args: 73 | x: A 4-D tensor of shape `[batch_size, channels, height, width]`. 74 | size: A tuple of integers indicates the output resolution. 75 | 76 | Returns: 77 | A N-D tensor of shape `[batch_size, out_height, out_width, channels]`. 78 | For the output channels, the first 2 are the locations and the rest 79 | are the RGB colors. 80 | """ 81 | N, C, H, W = x.shape 82 | if size: 83 | H, W = size 84 | 85 | features = [] 86 | 87 | # Generate location features. 88 | if self._use_location: 89 | locations = segsort_common.generate_location_features( 90 | (H, W), x.device, 'float') 91 | locations -= 0.5 92 | locations = locations.unsqueeze(0).expand(N, H, W, 2) 93 | features.append(locations) 94 | 95 | # Generate color features. 96 | if self._use_color: 97 | x = self.smooth_kernel(x) 98 | 99 | if size: 100 | x = F.interpolate(x, size=size, mode='bilinear') 101 | 102 | colors = x.permute(0, 2, 3, 1).contiguous() 103 | # Normalize color per data. 104 | if self._norm_color: 105 | mean_colors = torch.mean(colors.view(N, -1, C), 106 | dim=1, keepdim=True) 107 | mean_colors = mean_colors.view(N, 1, 1, C) 108 | colors = colors - mean_colors 109 | 110 | max_colors, _ = torch.max( 111 | torch.abs(colors.view(N, -1, C)), 112 | dim=1, keepdim=True) 113 | max_colors = max_colors.view(N, 1, 1, C) 114 | colors = colors / max_colors 115 | 116 | features.append(colors) 117 | 118 | features = torch.cat(features, dim=-1) 119 | return features 120 | -------------------------------------------------------------------------------- /pyscripts/benchmark/benchmark_by_mIoU.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | from PIL import Image 5 | import numpy as np 6 | 7 | 8 | def parse_args(): 9 | 10 | parser = argparse.ArgumentParser( 11 | description='Benchmark segmentation predictions' 12 | ) 13 | parser.add_argument('--pred_dir', type=str, default='', 14 | help='/path/to/prediction.') 15 | parser.add_argument('--gt_dir', type=str, default='', 16 | help='/path/to/ground-truths') 17 | parser.add_argument('--num_classes', type=int, default=21, 18 | help='number of segmentation classes') 19 | parser.add_argument('--string_replace', type=str, default=',', 20 | help='replace the first string with the second one') 21 | 22 | return parser.parse_args() 23 | 24 | 25 | def iou_stats(pred, target, num_classes=21, background=0): 26 | """Computes statistics of true positive (TP), false negative (FN) and 27 | false positive (FP). 28 | 29 | Args: 30 | pred: A numpy array. 31 | target: A numpy array which should be in the same size as pred. 32 | num_classes: A number indicating the number of valid classes. 33 | background: A number indicating the class index of the back ground. 34 | 35 | Returns: 36 | Three num_classes-D vector indicating the statistics of (TP+FN), (TP+FP) 37 | and TP across each class. 38 | """ 39 | # Set redundant classes to background. 40 | locs = np.logical_and(target > -1, target < num_classes) 41 | 42 | # true positive + false negative 43 | tp_fn, _ = np.histogram(target[locs], 44 | bins=np.arange(num_classes+1)) 45 | # true positive + false positive 46 | tp_fp, _ = np.histogram(pred[locs], 47 | bins=np.arange(num_classes+1)) 48 | # true positive 49 | tp_locs = np.logical_and(locs, pred == target) 50 | tp, _ = np.histogram(target[tp_locs], 51 | bins=np.arange(num_classes+1)) 52 | 53 | return tp_fn, tp_fp, tp 54 | 55 | 56 | def main(): 57 | 58 | args = parse_args() 59 | 60 | assert(os.path.isdir(args.pred_dir)) 61 | assert(os.path.isdir(args.gt_dir)) 62 | print(args.pred_dir) 63 | tp_fn = np.zeros(args.num_classes, dtype=np.float64) 64 | tp_fp = np.zeros(args.num_classes, dtype=np.float64) 65 | tp = np.zeros(args.num_classes, dtype=np.float64) 66 | for dirpath, dirnames, filenames in os.walk(args.pred_dir): 67 | for filename in filenames: 68 | predname = os.path.join(dirpath, filename) 69 | gtname = predname.replace(args.pred_dir, args.gt_dir) 70 | if args.string_replace != '': 71 | stra, strb = args.string_replace.split(',') 72 | gtname = gtname.replace(stra, strb) 73 | 74 | pred = np.asarray( 75 | Image.open(predname).convert(mode='L'), 76 | dtype=np.uint8) 77 | gt = np.asarray( 78 | Image.open(gtname).convert(mode='L'), 79 | dtype=np.uint8) 80 | _tp_fn, _tp_fp, _tp = iou_stats( 81 | pred, 82 | gt, 83 | num_classes=args.num_classes, 84 | background=0) 85 | 86 | tp_fn += _tp_fn 87 | tp_fp += _tp_fp 88 | tp += _tp 89 | 90 | iou = tp / (tp_fn + tp_fp - tp + 1e-12) * 100.0 91 | 92 | if args.num_classes == 15: 93 | # MSCOCO-Densepose 94 | class_names = ['Background', 'Torso', 'R. Hand', 'L. Hand', 95 | 'L. Foot', 'R. Foot', 'R. Thigh', 'L. Thigh', 96 | 'R. Leg', 'L. Leg', 'L. Arm' ,'R. Arm', 97 | 'L. Forearm', 'R. Forearm', 'Head'] 98 | elif args.num_classes == 21: 99 | # VOC12 100 | class_names = ['Background', 'Aero', 'Bike', 'Bird', 'Boat', 101 | 'Bottle', 'Bus', 'Car', 'Cat', 'Chair','Cow', 102 | 'Table', 'Dog', 'Horse' ,'MBike', 'Person', 103 | 'Plant', 'Sheep', 'Sofa', 'Train', 'TV'] 104 | else: 105 | raise NotImplementedError() 106 | 107 | 108 | for i in range(args.num_classes): 109 | if i >= len(class_names): 110 | break 111 | print('class {:10s}: {:02d}, acc: {:4.4f}%'.format( 112 | class_names[i], i, iou[i])) 113 | mean_iou = iou.sum() / args.num_classes 114 | print('mean IOU: {:4.4f}%'.format(mean_iou)) 115 | 116 | mean_pixel_acc = tp.sum() / (tp_fp.sum() + 1e-12) 117 | print('mean Pixel Acc: {:4.4f}%'.format(mean_pixel_acc)) 118 | 119 | if __name__ == '__main__': 120 | main() 121 | -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/comm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : comm.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import queue 12 | import collections 13 | import threading 14 | 15 | __all__ = ['FutureResult', 'SlavePipe', 'SyncMaster'] 16 | 17 | 18 | class FutureResult(object): 19 | """A thread-safe future implementation. Used only as one-to-one pipe.""" 20 | 21 | def __init__(self): 22 | self._result = None 23 | self._lock = threading.Lock() 24 | self._cond = threading.Condition(self._lock) 25 | 26 | def put(self, result): 27 | with self._lock: 28 | assert self._result is None, 'Previous result has\'t been fetched.' 29 | self._result = result 30 | self._cond.notify() 31 | 32 | def get(self): 33 | with self._lock: 34 | if self._result is None: 35 | self._cond.wait() 36 | 37 | res = self._result 38 | self._result = None 39 | return res 40 | 41 | 42 | _MasterRegistry = collections.namedtuple('MasterRegistry', ['result']) 43 | _SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result']) 44 | 45 | 46 | class SlavePipe(_SlavePipeBase): 47 | """Pipe for master-slave communication.""" 48 | 49 | def run_slave(self, msg): 50 | self.queue.put((self.identifier, msg)) 51 | ret = self.result.get() 52 | self.queue.put(True) 53 | return ret 54 | 55 | 56 | class SyncMaster(object): 57 | """An abstract `SyncMaster` object. 58 | 59 | - During the replication, as the data parallel will trigger an callback of each module, all slave devices should 60 | call `register(id)` and obtain an `SlavePipe` to communicate with the master. 61 | - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected, 62 | and passed to a registered callback. 63 | - After receiving the messages, the master device should gather the information and determine to message passed 64 | back to each slave devices. 65 | """ 66 | 67 | def __init__(self, master_callback): 68 | """ 69 | 70 | Args: 71 | master_callback: a callback to be invoked after having collected messages from slave devices. 72 | """ 73 | self._master_callback = master_callback 74 | self._queue = queue.Queue() 75 | self._registry = collections.OrderedDict() 76 | self._activated = False 77 | 78 | def register_slave(self, identifier): 79 | """ 80 | Register an slave device. 81 | 82 | Args: 83 | identifier: an identifier, usually is the device id. 84 | 85 | Returns: a `SlavePipe` object which can be used to communicate with the master device. 86 | 87 | """ 88 | if self._activated: 89 | assert self._queue.empty(), 'Queue is not clean before next initialization.' 90 | self._activated = False 91 | self._registry.clear() 92 | future = FutureResult() 93 | self._registry[identifier] = _MasterRegistry(future) 94 | return SlavePipe(identifier, self._queue, future) 95 | 96 | def run_master(self, master_msg): 97 | """ 98 | Main entry for the master device in each forward pass. 99 | The messages were first collected from each devices (including the master device), and then 100 | an callback will be invoked to compute the message to be sent back to each devices 101 | (including the master device). 102 | 103 | Args: 104 | master_msg: the message that the master want to send to itself. This will be placed as the first 105 | message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example. 106 | 107 | Returns: the message to be sent back to the master device. 108 | 109 | """ 110 | self._activated = True 111 | 112 | intermediates = [(0, master_msg)] 113 | for i in range(self.nr_slaves): 114 | intermediates.append(self._queue.get()) 115 | 116 | results = self._master_callback(intermediates) 117 | assert results[0][0] == 0, 'The first result should belongs to the master.' 118 | 119 | for i, res in results: 120 | if i == 0: 121 | continue 122 | self._registry[i].result.put(res) 123 | 124 | for i in range(self.nr_slaves): 125 | assert self._queue.get() is True 126 | 127 | return results[0][1] 128 | 129 | @property 130 | def nr_slaves(self): 131 | return len(self._registry) 132 | -------------------------------------------------------------------------------- /spml/utils/general/common.py: -------------------------------------------------------------------------------- 1 | """Utilility function for all. 2 | """ 3 | # This code is borrowed and re-implemented from: 4 | # https://github.com/jyhjinghwang/SegSort/blob/master/network/segsort/vis_utils.py 5 | # https://github.com/jyhjinghwang/SegSort/blob/master/network/segsort/common_utils.py 6 | 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | 11 | def resize_labels(labels, size): 12 | """Helper function to resize labels. 13 | 14 | Args: 15 | labels: A long tensor of shape `[batch_size, height, width]`. 16 | 17 | Returns: 18 | A long tensor of shape `[batch_size, new_height, new_width]`. 19 | """ 20 | n, h, w = labels.shape 21 | labels = F.interpolate(labels.view(n, 1, h, w).float(), 22 | size=size, 23 | mode='nearest') 24 | labels = labels.squeeze_(1).long() 25 | 26 | return labels 27 | 28 | 29 | def calculate_principal_components(embeddings, num_components=3): 30 | """Calculates the principal components given the embedding features. 31 | 32 | Args: 33 | embeddings: A 2-D float tensor of shape `[num_pixels, embedding_dims]`. 34 | num_components: An integer indicates the number of principal 35 | components to return. 36 | 37 | Returns: 38 | A 2-D float tensor of shape `[num_pixels, num_components]`. 39 | """ 40 | embeddings = embeddings - torch.mean(embeddings, 0, keepdim=True) 41 | _, _, v = torch.svd(embeddings) 42 | return v[:, :num_components] 43 | 44 | 45 | def pca(embeddings, num_components=3, principal_components=None): 46 | """Conducts principal component analysis on the embedding features. 47 | 48 | This function is used to reduce the dimensionality of the embedding. 49 | 50 | Args: 51 | embeddings: An N-D float tensor with shape with the 52 | last dimension as `embedding_dim`. 53 | num_components: The number of principal components. 54 | principal_components: A 2-D float tensor used to convert the 55 | embedding features to PCA'ed space, also known as the U matrix 56 | from SVD. If not given, this function will calculate the 57 | principal_components given inputs. 58 | 59 | Returns: 60 | A N-D float tensor with the last dimension as `num_components`. 61 | """ 62 | shape = embeddings.shape 63 | embeddings = embeddings.view(-1, shape[-1]) 64 | 65 | if principal_components is None: 66 | principal_components = calculate_principal_components( 67 | embeddings, num_components) 68 | embeddings = torch.mm(embeddings, principal_components) 69 | 70 | new_shape = list(shape[:-1]) + [num_components] 71 | embeddings = embeddings.view(new_shape) 72 | 73 | return embeddings 74 | 75 | 76 | def one_hot(labels, max_label=None): 77 | """Transform long labels into one-hot format. 78 | 79 | Args: 80 | labels: An N-D long tensor. 81 | 82 | Returns: 83 | An (N+1)-D long tensor. 84 | """ 85 | if max_label is None: 86 | max_label = labels.max() + 1 87 | 88 | shape = labels.shape 89 | labels = labels.view(-1, 1) 90 | one_hot_labels = torch.zeros((labels.shape[0], max_label), 91 | dtype=torch.long, 92 | device=labels.device) 93 | one_hot_labels = one_hot_labels.scatter_(1, labels, 1) 94 | 95 | new_shape = list(shape) + [max_label] 96 | one_hot_labels = one_hot_labels.view(new_shape) 97 | 98 | return one_hot_labels 99 | 100 | 101 | def normalize_embedding(embeddings, eps=1e-12): 102 | """Normalizes embedding by L2 norm. 103 | 104 | This function is used to normalize embedding so that the 105 | embedding features lie on a unit hypersphere. 106 | 107 | Args: 108 | embeddings: An N-D float tensor with feature embedding in 109 | the last dimension. 110 | 111 | Returns: 112 | An N-D float tensor with the same shape as input embedding 113 | with feature embedding normalized by L2 norm in the last 114 | dimension. 115 | """ 116 | norm = torch.norm(embeddings, dim=-1, keepdim=True) 117 | norm = torch.where(torch.ge(norm, eps), 118 | norm, 119 | torch.ones_like(norm).mul_(eps)) 120 | return embeddings / norm 121 | 122 | 123 | def segment_mean(x, index): 124 | """Function as tf.segment_mean. 125 | """ 126 | x = x.view(-1, x.shape[-1]) 127 | index = index.view(-1) 128 | 129 | max_index = index.max() + 1 130 | sum_x = torch.zeros((max_index, x.shape[-1]), 131 | dtype=torch.float, 132 | device=x.device) 133 | num_index = torch.zeros((max_index,), 134 | dtype=torch.float, 135 | device=x.device) 136 | 137 | num_index = num_index.scatter_add_( 138 | 0, index, torch.ones_like(index, dtype=torch.float)) 139 | num_index = torch.where(torch.eq(num_index, 0), 140 | torch.ones_like(num_index), 141 | num_index) 142 | 143 | index_2d = index.view(-1, 1).expand(-1, x.shape[-1]) 144 | sum_x = sum_x.scatter_add_(0, index_2d, x) 145 | mean_x = sum_x.div_(num_index.view(-1, 1)) 146 | 147 | return mean_x 148 | -------------------------------------------------------------------------------- /lib/nn/sync_batchnorm/tests/test_sync_batchnorm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : test_sync_batchnorm.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | 9 | import unittest 10 | 11 | import torch 12 | import torch.nn as nn 13 | from torch.autograd import Variable 14 | 15 | from lib.nn.sync_batchnorm.batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, convert_model 16 | from lib.nn.sync_batchnorm.replicate import patch_replication_callback 17 | from lib.nn.sync_batchnorm.replicate import DataParallelWithCallback 18 | from lib.nn.sync_batchnorm.unit_test import TorchTestCase 19 | from lib.nn.parallel.data_parallel import DataParallel 20 | 21 | 22 | def handy_var(a, unbias=True): 23 | n = a.size(0) 24 | asum = a.sum(dim=0) 25 | as_sum = (a ** 2).sum(dim=0) # a square sum 26 | sumvar = as_sum - asum * asum / n 27 | if unbias: 28 | return sumvar / (n - 1) 29 | else: 30 | return sumvar / n 31 | 32 | 33 | def _find_bn(module): 34 | for m in module.modules(): 35 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, SynchronizedBatchNorm1d, SynchronizedBatchNorm2d)): 36 | return m 37 | 38 | 39 | class SyncTestCase(TorchTestCase): 40 | def _syncParameters(self, bn1, bn2): 41 | bn1.reset_parameters() 42 | bn2.reset_parameters() 43 | if bn1.affine and bn2.affine: 44 | bn2.weight.data.copy_(bn1.weight.data) 45 | bn2.bias.data.copy_(bn1.bias.data) 46 | 47 | def _checkBatchNormResult(self, bn1, bn2, input, is_train, cuda=False): 48 | """Check the forward and backward for the customized batch normalization.""" 49 | bn1.train(mode=is_train) 50 | bn2.train(mode=is_train) 51 | 52 | if cuda: 53 | input = input.cuda() 54 | 55 | self._syncParameters(_find_bn(bn1), _find_bn(bn2)) 56 | 57 | input1 = Variable(input, requires_grad=True) 58 | output1 = bn1(input1) 59 | output1.sum().backward() 60 | input2 = Variable(input, requires_grad=True) 61 | output2 = bn2(input2) 62 | output2.sum().backward() 63 | 64 | self.assertTensorClose(input1.data, input2.data) 65 | self.assertTensorClose(output1.data, output2.data) 66 | self.assertTensorClose(input1.grad, input2.grad) 67 | self.assertTensorClose(_find_bn(bn1).running_mean, _find_bn(bn2).running_mean) 68 | self.assertTensorClose(_find_bn(bn1).running_var, _find_bn(bn2).running_var) 69 | 70 | def testSyncBatchNormNormalTrain(self): 71 | bn = nn.BatchNorm1d(10) 72 | sync_bn = SynchronizedBatchNorm1d(10, momentum=0.1) 73 | 74 | self._checkBatchNormResult(bn, sync_bn, torch.rand(16, 10), True) 75 | 76 | def testSyncBatchNormNormalEval(self): 77 | bn = nn.BatchNorm1d(10) 78 | sync_bn = SynchronizedBatchNorm1d(10, momentum=0.1) 79 | 80 | self._checkBatchNormResult(bn, sync_bn, torch.rand(16, 10), False) 81 | 82 | def testSyncBatchNormSyncTrain(self): 83 | bn = nn.BatchNorm1d(10, eps=1e-5, affine=False) 84 | sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False, momentum=0.1) 85 | sync_bn = DataParallel(sync_bn, device_ids=[0,1]) 86 | patch_replication_callback(sync_bn) 87 | #sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 88 | 89 | bn.cuda() 90 | sync_bn.cuda() 91 | 92 | self._checkBatchNormResult(bn, sync_bn, torch.rand(16, 10), True, cuda=True) 93 | 94 | def testSyncBatchNormSyncEval(self): 95 | bn = nn.BatchNorm1d(10, eps=1e-5, affine=False) 96 | sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False, momentum=0.1) 97 | sync_bn = DataParallel(sync_bn, device_ids=[0,1]) 98 | patch_replication_callback(sync_bn) 99 | #sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 100 | 101 | bn.cuda() 102 | sync_bn.cuda() 103 | 104 | self._checkBatchNormResult(bn, sync_bn, torch.rand(16, 10), False, cuda=True) 105 | 106 | def testSyncBatchNorm2DSyncTrain(self): 107 | bn = nn.BatchNorm2d(10) 108 | sync_bn = SynchronizedBatchNorm2d(10, momentum=0.1) 109 | sync_bn = DataParallel(sync_bn, device_ids=[0,1]) 110 | patch_replication_callback(sync_bn) 111 | #sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 112 | 113 | bn.cuda() 114 | sync_bn.cuda() 115 | 116 | self._checkBatchNormResult(bn, sync_bn, torch.rand(16, 10, 16, 16), True, cuda=True) 117 | 118 | def testConvertSyncBatchNorm2DSyncTrain(self): 119 | bn = nn.BatchNorm2d(10) 120 | sync_bn = convert_model(nn.BatchNorm2d(10)) 121 | sync_bn = DataParallel(sync_bn, device_ids=[0,1]) 122 | patch_replication_callback(sync_bn) 123 | #sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 124 | 125 | bn.cuda() 126 | sync_bn.cuda() 127 | 128 | self._checkBatchNormResult(bn, sync_bn, torch.rand(16, 10, 16, 16), True, cuda=True) 129 | 130 | 131 | 132 | if __name__ == '__main__': 133 | unittest.main() 134 | -------------------------------------------------------------------------------- /pyscripts/benchmark/benchmark_by_instance.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | from PIL import Image 5 | import numpy as np 6 | 7 | 8 | def parse_args(): 9 | 10 | parser = argparse.ArgumentParser( 11 | description='Benchmark segmentation predictions' 12 | ) 13 | parser.add_argument('--pred_dir', type=str, default='', 14 | help='/path/to/prediction.') 15 | parser.add_argument('--gt_dir', type=str, default='', 16 | help='/path/to/ground-truths') 17 | parser.add_argument('--inst_dir', type=str, default='', 18 | help='/path/to/instance-mask') 19 | parser.add_argument('--num_classes', type=int, default=21, 20 | help='number of segmentation classes') 21 | parser.add_argument('--string_replace', type=str, default=',', 22 | help='replace the first string with the second one') 23 | 24 | return parser.parse_args() 25 | 26 | 27 | def iou_stats(pred, target, num_classes=21, background=0): 28 | """Computes statistics of true positive (TP), false negative (FN) and 29 | false positive (FP). 30 | 31 | Args: 32 | pred: A numpy array. 33 | target: A numpy array which should be in the same size as pred. 34 | num_classes: A number indicating the number of valid classes. 35 | background: A number indicating the class index of the back ground. 36 | 37 | Returns: 38 | Three num_classes-D vector indicating the statistics of (TP+FN), (TP+FP) 39 | and TP across each class. 40 | """ 41 | # Set redundant classes to background. 42 | locs = np.logical_and(target > -1, target < num_classes) 43 | 44 | # true positive + false negative 45 | tp_fn, _ = np.histogram(target[locs], 46 | bins=np.arange(num_classes+1)) 47 | # true positive + false positive 48 | tp_fp, _ = np.histogram(pred[locs], 49 | bins=np.arange(num_classes+1)) 50 | # true positive 51 | tp_locs = np.logical_and(locs, pred == target) 52 | tp, _ = np.histogram(target[tp_locs], 53 | bins=np.arange(num_classes+1)) 54 | 55 | return tp_fn, tp_fp, tp 56 | 57 | 58 | def main(): 59 | 60 | args = parse_args() 61 | 62 | 63 | assert(os.path.isdir(args.pred_dir)) 64 | assert(os.path.isdir(args.gt_dir)) 65 | print(args.pred_dir) 66 | iou = np.zeros(args.num_classes, dtype=np.float64) 67 | ninst = np.zeros(args.num_classes, dtype=np.float64) 68 | for dirpath, dirnames, filenames in os.walk(args.pred_dir): 69 | for filename in filenames: 70 | predname = os.path.join(dirpath, filename) 71 | gtname = predname.replace(args.pred_dir, args.gt_dir) 72 | instname = predname.replace(args.pred_dir, args.inst_dir) 73 | if args.string_replace != '': 74 | stra, strb = args.string_replace.split(',') 75 | gtname = gtname.replace(stra, strb) 76 | instname = instname.replace(stra, strb) 77 | 78 | pred = np.asarray( 79 | Image.open(predname).convert(mode='L'), 80 | dtype=np.uint8) 81 | gt = np.asarray( 82 | Image.open(gtname).convert(mode='L'), 83 | dtype=np.uint8) 84 | inst = np.asarray( 85 | Image.open(instname).convert(mode='P'), 86 | dtype=np.uint8) 87 | 88 | # Compute true-positive, false-positive 89 | # and false-negative 90 | _tp_fn, _tp_fp, _tp = iou_stats( 91 | pred, 92 | gt, 93 | num_classes=args.num_classes, 94 | background=0) 95 | 96 | # Compute num. of instances per class 97 | inst_inds = np.unique(inst) 98 | ninst_ = np.zeros(args.num_classes, dtype=np.float64) 99 | for i in range(inst_inds.size): 100 | if i < 255: 101 | inst_ind = inst_inds[i] 102 | inst_mask = inst == inst_ind 103 | seg_mask = gt[inst_mask] 104 | npixel, _ = np.histogram( 105 | seg_mask, bins=args.num_classes, 106 | range=(0, args.num_classes-1)) # num. pixel per class of this inst. 107 | cls = np.argmax(npixel) 108 | ninst_[cls] += 1 109 | 110 | 111 | iou_ = _tp/(_tp_fn + _tp_fp - _tp + 1e-12) 112 | iou += iou_*ninst_ 113 | ninst += ninst_ 114 | 115 | iou /= ninst+1e-12 116 | iou *= 100 117 | 118 | if args.num_classes == 15: 119 | # MSCOCO-Densepose 120 | class_names = ['Background', 'Torso', 'R. Hand', 'L. Hand', 121 | 'L. Foot', 'R. Foot', 'R. Thigh', 'L. Thigh', 122 | 'R. Leg', 'L. Leg', 'L. Arm' ,'R. Arm', 123 | 'L. Forearm', 'R. Forearm', 'Head'] 124 | elif args.num_classes == 21: 125 | # VOC12 126 | class_names = ['Background', 'Aero', 'Bike', 'Bird', 'Boat', 127 | 'Bottle', 'Bus', 'Car', 'Cat', 'Chair','Cow', 128 | 'Table', 'Dog', 'Horse' ,'MBike', 'Person', 129 | 'Plant', 'Sheep', 'Sofa', 'Train', 'TV'] 130 | else: 131 | raise NotImplementedError() 132 | 133 | 134 | for i in range(args.num_classes): 135 | if i >= len(class_names): 136 | break 137 | print('class {:10s}: {:02d}, acc: {:4.4f}%'.format( 138 | class_names[i], i, iou[i])) 139 | mean_iou = iou.sum() / args.num_classes 140 | print('mean IOU: {:4.4f}%'.format(mean_iou)) 141 | 142 | if __name__ == '__main__': 143 | main() 144 | -------------------------------------------------------------------------------- /spml/models/backbones/resnet.py: -------------------------------------------------------------------------------- 1 | """Construct Residual Network.""" 2 | 3 | import math 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.nn.modules.batchnorm import _BatchNorm 9 | 10 | 11 | class Bottleneck(nn.Module): 12 | 13 | expansion = 4 14 | 15 | def __init__(self, inplanes, planes, stride=1, 16 | dilation=1, downsample=None): 17 | super(Bottleneck, self).__init__() 18 | self.conv1 = nn.Conv2d(inplanes, 19 | planes, 20 | kernel_size=1, 21 | bias=False) 22 | self.bn1 = nn.BatchNorm2d(planes, momentum=3e-4) 23 | 24 | self.conv2 = nn.Conv2d(planes, 25 | planes, 26 | kernel_size=3, 27 | stride=stride, 28 | padding=dilation, 29 | dilation=dilation, bias=False) 30 | self.bn2 = nn.BatchNorm2d(planes, momentum=3e-4) 31 | self.conv3 = nn.Conv2d(planes, 32 | planes * Bottleneck.expansion, 33 | kernel_size=1, 34 | bias=False) 35 | self.bn3 = nn.BatchNorm2d(planes * Bottleneck.expansion, 36 | momentum=3e-4) 37 | self.relu = nn.ReLU(inplace=True) 38 | self.downsample = downsample 39 | self.dilation = dilation 40 | self.stride = stride 41 | 42 | def forward(self, x): 43 | 44 | residual = x 45 | 46 | out = self.conv1(x) 47 | out = self.bn1(out) 48 | out = self.relu(out) 49 | 50 | out = self.conv2(out) 51 | out = self.bn2(out) 52 | out = self.relu(out) 53 | 54 | out = self.conv3(out) 55 | out = self.bn3(out) 56 | 57 | if self.downsample is not None: 58 | residual = self.downsample(x) 59 | 60 | out += residual 61 | out = self.relu(out) 62 | 63 | return out 64 | 65 | 66 | class conv1(nn.Module): 67 | 68 | def __init__(self): 69 | 70 | super(conv1, self).__init__() 71 | self.inplanes = 128 72 | 73 | #self.conv1 = nn.Conv2d(3, 74 | # 64, 75 | # kernel_size=7, 76 | # stride=2, 77 | # padding=3, 78 | # bias=False) 79 | self.conv1 = nn.Sequential( 80 | nn.Conv2d(3, 64, 81 | kernel_size=3, 82 | stride=2, 83 | padding=1, 84 | bias=False), 85 | nn.BatchNorm2d(64, momentum=3e-4), 86 | nn.ReLU(inplace=True), 87 | nn.Conv2d(64, 64, 88 | kernel_size=3, 89 | stride=1, 90 | padding=1, 91 | bias=False), 92 | nn.BatchNorm2d(64, momentum=3e-4), 93 | nn.ReLU(inplace=True), 94 | nn.Conv2d(64, 128, 95 | kernel_size=3, 96 | stride=1, 97 | padding=1, 98 | bias=False)) 99 | self.bn1 = nn.BatchNorm2d(128, momentum=3e-4) 100 | self.relu = nn.ReLU(inplace=True) 101 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 102 | 103 | 104 | def forward(self, x): 105 | 106 | x = self.conv1(x) 107 | x = self.bn1(x) 108 | x = self.relu(x) 109 | x = self.maxpool(x) 110 | return x 111 | 112 | 113 | class ResnetBackbone(nn.Module): 114 | 115 | def __init__(self, blocks, strides, dilations, config): 116 | 117 | super(ResnetBackbone, self).__init__() 118 | 119 | self.inplanes = 128 120 | self.conv1 = conv1() 121 | 122 | self.res2 = self._make_layer( 123 | Bottleneck, 64, blocks[0], stride=strides[0], dilation=dilations[0]) 124 | self.res3 = self._make_layer( 125 | Bottleneck, 128, blocks[1], stride=strides[1], dilation=dilations[1]) 126 | 127 | self.res4 = self._make_layer( 128 | Bottleneck, 256, blocks[2], stride=strides[2], dilation=dilations[2]) 129 | self.res5 = self._make_layer( 130 | Bottleneck, 512, blocks[3], stride=strides[3], dilation=dilations[3]) 131 | 132 | for m in self.modules(): 133 | if isinstance(m, nn.Conv2d): 134 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 135 | m.weight.data.normal_(0, math.sqrt(2. / n)) 136 | elif isinstance(m, _BatchNorm): 137 | m.weight.data.fill_(1) 138 | if m.bias is not None: 139 | m.bias.data.zero_() 140 | 141 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, 142 | grids=None): 143 | downsample = None 144 | if stride != 1 or self.inplanes != planes * block.expansion: 145 | downsample = nn.Sequential( 146 | nn.Conv2d(self.inplanes, planes * block.expansion, 147 | kernel_size=1, stride=stride, bias=False), 148 | nn.BatchNorm2d(planes * block.expansion, momentum=3e-4)) 149 | 150 | layers = [] 151 | if grids is None: 152 | grids = [1] * blocks 153 | 154 | if dilation == 1 or dilation == 2: 155 | layers.append(block(self.inplanes, planes, stride, dilation=1, 156 | downsample=downsample,)) 157 | elif dilation == 4: 158 | layers.append(block(self.inplanes, planes, stride, dilation=2, 159 | downsample=downsample,)) 160 | else: 161 | raise RuntimeError('=> unknown dilation size: {}'.format(dilation)) 162 | 163 | self.inplanes = planes * block.expansion 164 | for i in range(1, blocks): 165 | layers.append(block(self.inplanes, planes, 166 | dilation=dilation*grids[i],)) 167 | 168 | return nn.Sequential(*layers) 169 | 170 | def forward(self, x): 171 | conv1 = self.conv1(x) 172 | 173 | res2 = self.res2(conv1) 174 | res3 = self.res3(res2) 175 | res4 = self.res4(res3) 176 | res5 = self.res5(res4) 177 | 178 | return res2, res3, res4, res5 179 | -------------------------------------------------------------------------------- /pyscripts/inference/inference_softmax.py: -------------------------------------------------------------------------------- 1 | """Inference script for semantic segmentation by softmax classifier. 2 | """ 3 | from __future__ import print_function, division 4 | import os 5 | import math 6 | 7 | import PIL.Image as Image 8 | import numpy as np 9 | import cv2 10 | import torch 11 | import torch.backends.cudnn as cudnn 12 | 13 | import spml.data.transforms as transforms 14 | import spml.utils.general.vis as vis_utils 15 | from spml.data.datasets.base_dataset import ListDataset 16 | from spml.config.default import config 17 | from spml.config.parse_args import parse_args 18 | from spml.models.embeddings.resnet_pspnet import resnet_50_pspnet, resnet_101_pspnet 19 | from spml.models.embeddings.resnet_deeplab import resnet_50_deeplab, resnet_101_deeplab 20 | from spml.models.predictions.softmax_classifier import softmax_classifier 21 | 22 | cudnn.enabled = True 23 | cudnn.benchmark = True 24 | 25 | 26 | def main(): 27 | """Inference for semantic segmentation. 28 | """ 29 | # Retreve experiment configurations. 30 | args = parse_args('Inference for semantic segmentation.') 31 | 32 | # Create directories to save results. 33 | semantic_dir = os.path.join(args.save_dir, 'semantic_gray') 34 | semantic_rgb_dir = os.path.join(args.save_dir, 'semantic_color') 35 | if not os.path.isdir(semantic_dir): 36 | os.makedirs(semantic_dir) 37 | if not os.path.isdir(semantic_rgb_dir): 38 | os.makedirs(semantic_rgb_dir) 39 | 40 | # Create color map. 41 | color_map = vis_utils.load_color_map(config.dataset.color_map_path) 42 | color_map = color_map.numpy() 43 | 44 | # Create data loaders. 45 | test_dataset = ListDataset( 46 | data_dir=args.data_dir, 47 | data_list=args.data_list, 48 | img_mean=config.network.pixel_means, 49 | img_std=config.network.pixel_stds, 50 | size=None, 51 | random_crop=False, 52 | random_scale=False, 53 | random_mirror=False, 54 | training=False) 55 | test_image_paths = test_dataset.image_paths 56 | 57 | # Create models. 58 | if config.network.backbone_types == 'panoptic_pspnet_101': 59 | embedding_model = resnet_101_pspnet(config).cuda() 60 | elif config.network.backbone_types == 'panoptic_deeplab_101': 61 | embedding_model = resnet_101_deeplab(config).cuda() 62 | else: 63 | raise ValueError('Not support ' + config.network.backbone_types) 64 | 65 | prediction_model = softmax_classifier(config).cuda() 66 | embedding_model.eval() 67 | prediction_model.eval() 68 | 69 | # Load trained weights. 70 | model_path_template = os.path.join(args.snapshot_dir, 'model-{:d}.pth') 71 | save_iter = config.train.max_iteration - 1 72 | embedding_model.load_state_dict( 73 | torch.load(model_path_template.format(save_iter))['embedding_model'], 74 | resume=True) 75 | prediction_model.load_state_dict( 76 | torch.load(model_path_template.format(save_iter))['prediction_model']) 77 | 78 | 79 | # Start inferencing. 80 | for data_index in range(len(test_dataset)): 81 | # Image path. 82 | image_path = test_image_paths[data_index] 83 | base_name = os.path.basename(image_path).replace('.jpg', '.png') 84 | 85 | # Image resolution. 86 | image_batch, _, _ = test_dataset[data_index] 87 | image_h, image_w = image_batch['image'].shape[-2:] 88 | 89 | # Resize the input image. 90 | if config.test.image_size > 0: 91 | image_batch['image'] = transforms.resize_with_interpolation( 92 | image_batch['image'].transpose(1, 2, 0), 93 | config.test.image_size, 94 | method='bilinear').transpose(2, 0, 1) 95 | resize_image_h, resize_image_w = image_batch['image'].shape[-2:] 96 | 97 | # Crop and Pad the input image. 98 | image_batch['image'] = transforms.resize_with_pad( 99 | image_batch['image'].transpose(1, 2, 0), 100 | config.test.crop_size, 101 | image_pad_value=0).transpose(2, 0, 1) 102 | image_batch['image'] = torch.FloatTensor(image_batch['image'][np.newaxis, ...]).cuda() 103 | pad_image_h, pad_image_w = image_batch['image'].shape[-2:] 104 | 105 | # Create the ending index of each patch. 106 | stride_h, stride_w = config.test.stride 107 | crop_h, crop_w = config.test.crop_size 108 | npatches_h = math.ceil(1.0 * (pad_image_h-crop_h) / stride_h) + 1 109 | npatches_w = math.ceil(1.0 * (pad_image_w-crop_w) / stride_w) + 1 110 | patch_ind_h = np.linspace( 111 | crop_h, pad_image_h, npatches_h, dtype=np.int32) 112 | patch_ind_w = np.linspace( 113 | crop_w, pad_image_w, npatches_w, dtype=np.int32) 114 | 115 | # Create place holder for full-resolution embeddings. 116 | outputs = {} 117 | with torch.no_grad(): 118 | for ind_h in patch_ind_h: 119 | for ind_w in patch_ind_w: 120 | sh, eh = ind_h - crop_h, ind_h 121 | sw, ew = ind_w - crop_w, ind_w 122 | crop_image_batch = { 123 | k: v[:, :, sh:eh, sw:ew] for k, v in image_batch.items()} 124 | 125 | # Feed-forward. 126 | crop_embeddings = embedding_model( 127 | crop_image_batch, resize_as_input=True) 128 | crop_outputs = prediction_model(crop_embeddings) 129 | 130 | for name, crop_out in crop_outputs.items(): 131 | 132 | if crop_out is not None: 133 | if name not in outputs.keys(): 134 | output_shape = list(crop_out.shape) 135 | output_shape[-2:] = pad_image_h, pad_image_w 136 | outputs[name] = torch.zeros(output_shape, dtype=crop_out.dtype).cuda() 137 | outputs[name][..., sh:eh, sw:ew] += crop_out 138 | 139 | # Save semantic predictions. 140 | semantic_logits = outputs.get('semantic_logit', None) 141 | if semantic_logits is not None: 142 | semantic_pred = torch.argmax(semantic_logits, 1) 143 | semantic_pred = (semantic_pred.view(pad_image_h, pad_image_w) 144 | .cpu() 145 | .data 146 | .numpy() 147 | .astype(np.uint8)) 148 | semantic_pred = semantic_pred[:resize_image_h, :resize_image_w] 149 | semantic_pred = cv2.resize( 150 | semantic_pred, 151 | (image_w, image_h), 152 | interpolation=cv2.INTER_NEAREST) 153 | 154 | semantic_pred_name = os.path.join(semantic_dir, base_name) 155 | Image.fromarray(semantic_pred, mode='L').save(semantic_pred_name) 156 | 157 | semantic_pred_rgb = color_map[semantic_pred] 158 | semantic_pred_rgb_name = os.path.join(semantic_rgb_dir, base_name) 159 | Image.fromarray(semantic_pred_rgb, mode='RGB').save( 160 | semantic_pred_rgb_name) 161 | 162 | # Clean GPU memory cache to save more space. 163 | outputs = {} 164 | crop_embeddings = {} 165 | crop_outputs = {} 166 | torch.cuda.empty_cache() 167 | 168 | 169 | if __name__ == '__main__': 170 | main() 171 | -------------------------------------------------------------------------------- /pyscripts/inference/inference_softmax_msc.py: -------------------------------------------------------------------------------- 1 | """Inference script for semantic segmentation by softmax classifier. 2 | """ 3 | from __future__ import print_function, division 4 | import os 5 | import math 6 | 7 | import PIL.Image as Image 8 | import numpy as np 9 | import cv2 10 | import torch 11 | import torch.nn.functional as F 12 | import torch.backends.cudnn as cudnn 13 | from tqdm import tqdm 14 | 15 | import spml.data.transforms as transforms 16 | import spml.utils.general.vis as vis_utils 17 | import spml.utils.general.others as other_utils 18 | from spml.data.datasets.base_dataset import ListDataset 19 | from spml.config.default import config 20 | from spml.config.parse_args import parse_args 21 | from spml.models.embeddings.resnet_pspnet import resnet_50_pspnet, resnet_101_pspnet 22 | from spml.models.embeddings.resnet_deeplab import resnet_50_deeplab, resnet_101_deeplab 23 | from spml.models.predictions.softmax_classifier import softmax_classifier 24 | 25 | cudnn.enabled = True 26 | cudnn.benchmark = True 27 | 28 | 29 | def main(): 30 | """Inference for semantic segmentation. 31 | """ 32 | # Retreve experiment configurations. 33 | args = parse_args('Inference for semantic segmentation.') 34 | 35 | # Create directories to save results. 36 | semantic_dir = os.path.join(args.save_dir, 'semantic_gray') 37 | semantic_rgb_dir = os.path.join(args.save_dir, 'semantic_color') 38 | os.makedirs(semantic_dir, exist_ok=True) 39 | os.makedirs(semantic_rgb_dir, exist_ok=True) 40 | 41 | # Create color map. 42 | color_map = vis_utils.load_color_map(config.dataset.color_map_path) 43 | color_map = color_map.numpy() 44 | 45 | # Create data loaders. 46 | test_dataset = ListDataset( 47 | data_dir=args.data_dir, 48 | data_list=args.data_list, 49 | img_mean=config.network.pixel_means, 50 | img_std=config.network.pixel_stds, 51 | size=None, 52 | random_crop=False, 53 | random_scale=False, 54 | random_mirror=False, 55 | training=False) 56 | test_image_paths = test_dataset.image_paths 57 | 58 | # Create models. 59 | if config.network.backbone_types == 'panoptic_pspnet_101': 60 | embedding_model = resnet_101_pspnet(config).cuda() 61 | elif config.network.backbone_types == 'panoptic_deeplab_101': 62 | embedding_model = resnet_101_deeplab(config).cuda() 63 | else: 64 | raise ValueError('Not support ' + config.network.backbone_types) 65 | 66 | prediction_model = softmax_classifier(config).cuda() 67 | embedding_model.eval() 68 | prediction_model.eval() 69 | 70 | # Load trained weights. 71 | model_path_template = os.path.join(args.snapshot_dir, 'model-{:d}.pth') 72 | save_iter = config.train.max_iteration - 1 73 | embedding_model.load_state_dict( 74 | torch.load(model_path_template.format(save_iter))['embedding_model'], 75 | resume=True) 76 | prediction_model.load_state_dict( 77 | torch.load(model_path_template.format(save_iter))['prediction_model']) 78 | 79 | 80 | # Start inferencing. 81 | with torch.no_grad(): 82 | for data_index in tqdm(range(len(test_dataset))): 83 | # Image path. 84 | image_path = test_image_paths[data_index] 85 | base_name = os.path.basename(image_path).replace('.jpg', '.png') 86 | 87 | # Image resolution. 88 | image_batch, label_batch, _ = test_dataset[data_index] 89 | image_h, image_w = image_batch['image'].shape[-2:] 90 | batches = other_utils.create_image_pyramid( 91 | image_batch, label_batch, 92 | scales=[0.5, 0.75, 1, 1.25, 1.5], 93 | is_flip=True) 94 | 95 | semantic_logits = [] 96 | for image_batch, label_batch, data_info in batches: 97 | resize_image_h, resize_image_w = image_batch['image'].shape[-2:] 98 | # Crop and Pad the input image. 99 | image_batch['image'] = transforms.resize_with_pad( 100 | image_batch['image'].transpose(1, 2, 0), 101 | config.test.crop_size, 102 | image_pad_value=0).transpose(2, 0, 1) 103 | image_batch['image'] = torch.FloatTensor( 104 | image_batch['image'][np.newaxis, ...]).cuda() 105 | pad_image_h, pad_image_w = image_batch['image'].shape[-2:] 106 | 107 | # Create the ending index of each patch. 108 | stride_h, stride_w = config.test.stride 109 | crop_h, crop_w = config.test.crop_size 110 | npatches_h = math.ceil(1.0 * (pad_image_h-crop_h) / stride_h) + 1 111 | npatches_w = math.ceil(1.0 * (pad_image_w-crop_w) / stride_w) + 1 112 | patch_ind_h = np.linspace( 113 | crop_h, pad_image_h, npatches_h, dtype=np.int32) 114 | patch_ind_w = np.linspace( 115 | crop_w, pad_image_w, npatches_w, dtype=np.int32) 116 | 117 | # Create place holder for full-resolution embeddings. 118 | semantic_logit = torch.FloatTensor( 119 | 1, config.dataset.num_classes, pad_image_h, pad_image_w).zero_().to("cuda:0") 120 | counts = torch.FloatTensor( 121 | 1, 1, pad_image_h, pad_image_w).zero_().to("cuda:0") 122 | for ind_h in patch_ind_h: 123 | for ind_w in patch_ind_w: 124 | sh, eh = ind_h - crop_h, ind_h 125 | sw, ew = ind_w - crop_w, ind_w 126 | crop_image_batch = { 127 | k: v[:, :, sh:eh, sw:ew] for k, v in image_batch.items()} 128 | 129 | # Feed-forward. 130 | crop_embeddings = embedding_model( 131 | crop_image_batch, resize_as_input=True) 132 | crop_outputs = prediction_model(crop_embeddings) 133 | semantic_logit[..., sh:eh, sw:ew] += crop_outputs['semantic_logit'].to("cuda:0") 134 | counts[..., sh:eh, sw:ew] += 1 135 | semantic_logit /= counts 136 | semantic_logit = semantic_logit[..., :resize_image_h, :resize_image_w] 137 | semantic_logit = F.interpolate( 138 | semantic_logit, size=(image_h, image_w), mode='bilinear') 139 | semantic_logit = F.softmax(semantic_logit, dim=1) 140 | semantic_logit = semantic_logit.data.cpu().numpy().astype(np.float32) 141 | if data_info['is_flip']: 142 | semantic_logit = semantic_logit[..., ::-1] 143 | semantic_logits.append(semantic_logit) 144 | 145 | # Save semantic predictions. 146 | semantic_logits = np.concatenate(semantic_logits, axis=0) 147 | semantic_logits = np.sum(semantic_logits, axis=0) 148 | if semantic_logits is not None: 149 | semantic_pred = np.argmax(semantic_logits, axis=0).astype(np.uint8) 150 | 151 | semantic_pred_name = os.path.join(semantic_dir, base_name) 152 | Image.fromarray(semantic_pred, mode='L').save(semantic_pred_name) 153 | 154 | semantic_pred_rgb = color_map[semantic_pred] 155 | semantic_pred_rgb_name = os.path.join(semantic_rgb_dir, base_name) 156 | Image.fromarray(semantic_pred_rgb, mode='RGB').save( 157 | semantic_pred_rgb_name) 158 | 159 | # Clean GPU memory cache to save more space. 160 | outputs = {} 161 | crop_embeddings = {} 162 | crop_outputs = {} 163 | torch.cuda.empty_cache() 164 | 165 | 166 | if __name__ == '__main__': 167 | main() 168 | -------------------------------------------------------------------------------- /spml/data/transforms.py: -------------------------------------------------------------------------------- 1 | """Utility functions to process images. 2 | """ 3 | 4 | import numpy as np 5 | import cv2 6 | 7 | 8 | def resize(image, 9 | label, 10 | ratio, 11 | image_method='bilinear', 12 | label_method='nearest'): 13 | """Rescale image and label to the same size by the specified ratio. 14 | The aspect ratio is remained the same after rescaling. 15 | 16 | Args: 17 | image: A 2-D/3-D tensor of shape `[height, width, channels]`. 18 | label: A 2-D/3-D tensor of shape `[height, width, channels]`. 19 | ratio: A float/integer indicates the scaling ratio. 20 | image_method: Image resizing method. bilinear/nearest. 21 | label_method: Image resizing method. bilinear/nearest. 22 | 23 | Return: 24 | Two tensors of shape `[new_height, new_width, channels]`. 25 | """ 26 | h, w = image.shape[:2] 27 | new_h, new_w = int(ratio * h), int(ratio * w) 28 | 29 | inter_image = (cv2.INTER_LINEAR if image_method == 'bilinear' 30 | else cv2.INTER_NEAREST) 31 | new_image = cv2.resize(image, (new_w, new_h), interpolation=inter_image) 32 | 33 | inter_label = (cv2.INTER_LINEAR if label_method == 'bilinear' 34 | else cv2.INTER_NEAREST) 35 | new_label = cv2.resize(label, (new_w, new_h), interpolation=inter_label) 36 | 37 | return new_image, new_label 38 | 39 | 40 | def random_resize(image, 41 | label, 42 | scale_min=1.0, 43 | scale_max=1.0, 44 | image_method='bilinear', 45 | label_method='nearest'): 46 | """Randomly rescale image and label to the same size. The 47 | aspect ratio is remained the same after rescaling. 48 | 49 | Args: 50 | image: A 2-D/3-D tensor of shape `[height, width, channels]`. 51 | label: A 2-D/3-D tensor of shape `[height, width, channels]`. 52 | scale_min: A float indicates the minimum scaling ratio. 53 | scale_max: A float indicates the maximum scaling ratio. 54 | image_method: Image resizing method. bilinear/nearest. 55 | label_method: Image resizing method. bilinear/nearest. 56 | 57 | Return: 58 | Two tensors of shape `[new_height, new_width, channels]`. 59 | """ 60 | assert(scale_max >= scale_min) 61 | ratio = np.random.uniform(scale_min, scale_max) 62 | return resize(image, label, ratio, image_method, label_method) 63 | 64 | 65 | def mirror(image, label): 66 | """Horizontally flipp image and label. 67 | 68 | Args: 69 | image: A 2-D/3-D tensor of shape `[height, width, channels]`. 70 | label: A 2-D/3-D tensor of shape `[height, width, channels]`. 71 | 72 | Return: 73 | Two tensors of shape `[new_height, new_width, channels]`. 74 | """ 75 | 76 | image = image[:, ::-1, ...] 77 | label = label[:, ::-1, ...] 78 | return image, label 79 | 80 | 81 | def random_mirror(image, label): 82 | """Randomly horizontally flipp image and label. 83 | 84 | Args: 85 | image: A 2-D/3-D tensor of shape `[height, width, channels]`. 86 | label: A 2-D/3-D tensor of shape `[height, width, channels]`. 87 | 88 | Return: 89 | Two tensors of shape `[new_height, new_width, channels]`. 90 | """ 91 | is_flip = np.random.uniform(0, 1.0) >= 0.5 92 | if is_flip: 93 | image, label = mirror(image, label) 94 | 95 | return image, label 96 | 97 | 98 | def resize_with_interpolation(image, larger_size, method='bilinear'): 99 | """Rescale image with larger size as `larger_size`. The aspect 100 | ratio is remained the same after rescaling. 101 | 102 | Args: 103 | image: A 2-D/3-D tensor of shape `[height, width, channels]`. 104 | larger_size: An interger indicates the target size of larger side. 105 | method: Image resizing method. bilinear/nearest. 106 | 107 | Return: 108 | A tensor of shape `[new_height, new_width, channels]`. 109 | """ 110 | h, w = image.shape[:2] 111 | new_size = float(larger_size) 112 | ratio = np.minimum(new_size / h, new_size / w) 113 | new_h, new_w = int(ratio * h), int(ratio * w) 114 | 115 | inter = (cv2.INTER_LINEAR if method == 'bilinear' 116 | else cv2.INTER_NEAREST) 117 | new_image = cv2.resize(image, (new_w, new_h), interpolation=inter) 118 | 119 | return new_image 120 | 121 | 122 | def resize_with_pad(image, size, image_pad_value=0, pad_mode='left_top'): 123 | """Upscale image by pad to the width and height. 124 | 125 | Args: 126 | image: A 2-D/3-D tensor of shape `[height, width, channels]`. 127 | size: A tuple of integers indicates the target size. 128 | image_pad_value: An integer indicates the padding value. 129 | pad_mode: Padding mode. left_top/center. 130 | 131 | Return: 132 | A tensor of shape `[new_height, new_width, channels]`. 133 | """ 134 | h, w = image.shape[:2] 135 | new_shape = list(image.shape) 136 | new_shape[0] = h if h > size[0] else size[0] 137 | new_shape[1] = w if w > size[1] else size[1] 138 | pad_image = np.zeros(new_shape, dtype=image.dtype) 139 | 140 | if isinstance(image_pad_value, int) or isinstance(image_pad_value, float): 141 | pad_image.fill(image_pad_value) 142 | else: 143 | for ind_ch, val in enumerate(image_pad_value): 144 | pad_image[:, :, ind_ch].fill(val) 145 | 146 | if pad_mode == 'center': 147 | s_y = (new_shape[0] - h) // 2 148 | s_x = (new_shape[1] - w) // 2 149 | pad_image[s_y:s_y+h, s_x:s_x+w, ...] = image 150 | elif pad_mode == 'left_top': 151 | pad_image[:h, :w, ...] = image 152 | else: 153 | raise ValueError('Unsupported padding mode') 154 | 155 | return pad_image 156 | 157 | 158 | def random_crop_with_pad(image, 159 | label, 160 | crop_size, 161 | image_pad_value=0, 162 | label_pad_value=255, 163 | pad_mode='left_top', 164 | return_bbox=False): 165 | """Randomly crop image and label, and pad them before cropping 166 | if the size is smaller than `crop_size`. 167 | 168 | Args: 169 | image: A 2-D/3-D tensor of shape `[height, width, channels]`. 170 | label: A 2-D/3-D tensor of shape `[height, width, channels]`. 171 | crop_size: A tuple of integers indicates the cropped size. 172 | image_pad_value: An integer indicates the padding value. 173 | label_pad_value: An integer indicates the padding value. 174 | pad_mode: Padding mode. left_top/center. 175 | 176 | Return: 177 | Two tensors of shape `[new_height, new_width, channels]`. 178 | """ 179 | image = resize_with_pad(image, crop_size, 180 | image_pad_value, pad_mode) 181 | label = resize_with_pad(label, crop_size, 182 | label_pad_value, pad_mode) 183 | 184 | h, w = image.shape[:2] 185 | start_h = int(np.floor(np.random.uniform(0, h - crop_size[0]))) 186 | start_w = int(np.floor(np.random.uniform(0, w - crop_size[1]))) 187 | end_h = start_h + crop_size[0] 188 | end_w = start_w + crop_size[1] 189 | 190 | crop_image = image[start_h:end_h, start_w:end_w, ...] 191 | crop_label = label[start_h:end_h, start_w:end_w, ...] 192 | 193 | if return_bbox: 194 | bbox = [start_w, start_h, end_w, end_h] 195 | return crop_image, crop_label, bbox 196 | else: 197 | return crop_image, crop_label 198 | -------------------------------------------------------------------------------- /pyscripts/train/train_densepose_classifier.py: -------------------------------------------------------------------------------- 1 | """Script for training softmax classifier only for DensePose. 2 | """ 3 | from __future__ import print_function, division 4 | import os 5 | 6 | import torch 7 | import torch.utils.data 8 | import torch.backends.cudnn as cudnn 9 | import torch.nn.parallel.scatter_gather as scatter_gather 10 | import tensorboardX 11 | from tqdm import tqdm 12 | 13 | from lib.nn.parallel.data_parallel import DataParallel 14 | from lib.nn.optimizer import SGD 15 | from spml.config.default import config 16 | from spml.config.parse_args import parse_args 17 | import spml.utils.general.train as train_utils 18 | import spml.utils.general.vis as vis_utils 19 | import spml.utils.general.others as other_utils 20 | import spml.models.utils as model_utils 21 | from spml.data.datasets.densepose_dataset import DenseposeClassifierDataset 22 | from spml.models.embeddings.resnet_pspnet_densepose import resnet_50_pspnet, resnet_101_pspnet 23 | from spml.models.predictions.softmax_classifier import softmax_classifier 24 | 25 | torch.cuda.manual_seed_all(235) 26 | torch.manual_seed(235) 27 | 28 | cudnn.enabled = True 29 | cudnn.benchmark = True 30 | 31 | 32 | def main(): 33 | """Training for softmax classifier only on DensePose. 34 | """ 35 | # Retreve experiment configurations. 36 | args = parse_args('Training for softmax classifier only on DensePose.') 37 | 38 | # Retrieve GPU informations. 39 | device_ids = [int(i) for i in config.gpus.split(',')] 40 | gpu_ids = [torch.device('cuda', i) for i in device_ids] 41 | num_gpus = len(gpu_ids) 42 | 43 | # Create logger and tensorboard writer. 44 | summary_writer = tensorboardX.SummaryWriter(logdir=args.snapshot_dir) 45 | color_map = vis_utils.load_color_map(config.dataset.color_map_path) 46 | 47 | model_path_template = os.path.join(args.snapshot_dir, 48 | 'model-{:d}.pth') 49 | optimizer_path_template = os.path.join(args.snapshot_dir, 50 | 'model-{:d}.state.pth') 51 | 52 | # Create data loaders. 53 | train_dataset = DenseposeClassifierDataset( 54 | data_dir=args.data_dir, 55 | data_list=args.data_list, 56 | img_mean=config.network.pixel_means, 57 | img_std=config.network.pixel_stds, 58 | size=config.train.crop_size, 59 | random_crop=config.train.random_crop, 60 | random_scale=config.train.random_scale, 61 | random_mirror=config.train.random_mirror, 62 | random_grayscale=True, 63 | random_blur=True, 64 | training=True) 65 | 66 | train_loader = torch.utils.data.DataLoader( 67 | train_dataset, 68 | batch_size=config.train.batch_size, 69 | shuffle=config.train.shuffle, 70 | num_workers=num_gpus * config.num_threads, 71 | drop_last=False, 72 | collate_fn=train_dataset.collate_fn) 73 | 74 | # Create models. 75 | if config.network.backbone_types == 'panoptic_pspnet_101': 76 | embedding_model = resnet_101_pspnet(config).cuda() 77 | else: 78 | raise ValueError('Not support ' + config.network.backbone_types) 79 | 80 | if config.network.prediction_types == 'softmax_classifier': 81 | prediction_model = softmax_classifier(config).cuda() 82 | else: 83 | raise ValueError('Not support ' + config.network.prediction_types) 84 | 85 | # Use customized optimizer and pass lr=1 to support different lr for 86 | # different weights. 87 | optimizer = SGD( 88 | embedding_model.get_params_lr() + prediction_model.get_params_lr(), 89 | lr=1, 90 | momentum=config.train.momentum, 91 | weight_decay=config.train.weight_decay) 92 | optimizer.zero_grad() 93 | 94 | # Load pre-trained weights. 95 | curr_iter = config.train.begin_iteration 96 | if config.network.pretrained: 97 | print('Loading pre-trained model: {:s}'.format(config.network.pretrained)) 98 | embedding_model.load_state_dict( 99 | torch.load(config.network.pretrained)['embedding_model'], 100 | resume=True) 101 | else: 102 | raise ValueError('Pre-trained model is required.') 103 | 104 | # Distribute model weights to multi-gpus. 105 | embedding_model = DataParallel(embedding_model, 106 | device_ids=device_ids, 107 | gather_output=False) 108 | prediction_model = DataParallel(prediction_model, 109 | device_ids=device_ids, 110 | gather_output=False) 111 | 112 | embedding_model.eval() 113 | prediction_model.train() 114 | print(embedding_model) 115 | print(prediction_model) 116 | 117 | # Create memory bank. 118 | memory_banks = {} 119 | 120 | # start training 121 | train_iterator = train_loader.__iter__() 122 | iterator_index = 0 123 | pbar = tqdm(range(curr_iter, config.train.max_iteration)) 124 | for curr_iter in pbar: 125 | # Check if the rest of datas is enough to iterate through; 126 | # otherwise, re-initiate the data iterator. 127 | if iterator_index + num_gpus >= len(train_loader): 128 | train_iterator = train_loader.__iter__() 129 | iterator_index = 0 130 | 131 | # Feed-forward. 132 | image_batch, label_batch = other_utils.prepare_datas_and_labels_mgpu( 133 | train_iterator, gpu_ids) 134 | iterator_index += num_gpus 135 | 136 | # Generate embeddings, clustering and prototypes. 137 | with torch.no_grad(): 138 | embeddings = embedding_model(*zip(image_batch, label_batch)) 139 | 140 | # Compute loss. 141 | outputs = prediction_model(*zip(embeddings, label_batch)) 142 | outputs = scatter_gather.gather(outputs, gpu_ids[0]) 143 | losses = [] 144 | for k in ['sem_ann_loss']: 145 | loss = outputs.get(k, None) 146 | if loss is not None: 147 | outputs[k] = loss.mean() 148 | losses.append(outputs[k]) 149 | loss = sum(losses) 150 | acc = outputs['accuracy'].mean() 151 | 152 | # Backward propogation. 153 | if config.train.lr_policy == 'step': 154 | lr = train_utils.lr_step(config.train.base_lr, 155 | curr_iter, 156 | config.train.decay_iterations, 157 | config.train.warmup_iteration) 158 | else: 159 | lr = train_utils.lr_poly(config.train.base_lr, 160 | curr_iter, 161 | config.train.max_iteration, 162 | config.train.warmup_iteration) 163 | 164 | optimizer.zero_grad() 165 | loss.backward() 166 | optimizer.step(lr) 167 | 168 | # Snapshot the trained model. 169 | if ((curr_iter+1) % config.train.snapshot_step == 0 170 | or curr_iter == config.train.max_iteration - 1): 171 | model_state_dict = { 172 | 'embedding_model': embedding_model.module.state_dict(), 173 | 'prediction_model': prediction_model.module.state_dict()} 174 | torch.save(model_state_dict, 175 | model_path_template.format(curr_iter)) 176 | torch.save(optimizer.state_dict(), 177 | optimizer_path_template.format(curr_iter)) 178 | 179 | # Print loss in the progress bar. 180 | line = 'loss = {:.3f}, acc = {:.3f}, lr = {:.6f}'.format( 181 | loss.item(), acc.item(), lr) 182 | pbar.set_description(line) 183 | 184 | 185 | if __name__ == '__main__': 186 | main() 187 | -------------------------------------------------------------------------------- /pyscripts/train/train_classifier.py: -------------------------------------------------------------------------------- 1 | """Script for training softmax classifier only. 2 | """ 3 | from __future__ import print_function, division 4 | import os 5 | 6 | import torch 7 | import torch.utils.data 8 | import torch.backends.cudnn as cudnn 9 | import torch.nn.parallel.scatter_gather as scatter_gather 10 | import tensorboardX 11 | from tqdm import tqdm 12 | 13 | from lib.nn.parallel.data_parallel import DataParallel 14 | from lib.nn.optimizer import SGD 15 | from spml.config.default import config 16 | from spml.config.parse_args import parse_args 17 | import spml.utils.general.train as train_utils 18 | import spml.utils.general.vis as vis_utils 19 | import spml.utils.general.others as other_utils 20 | import spml.models.utils as model_utils 21 | from spml.data.datasets.list_tag_dataset import ListTagClassifierDataset 22 | from spml.models.embeddings.resnet_pspnet import resnet_50_pspnet, resnet_101_pspnet 23 | from spml.models.embeddings.resnet_deeplab import resnet_50_deeplab, resnet_101_deeplab 24 | from spml.models.predictions.softmax_classifier import softmax_classifier 25 | 26 | torch.cuda.manual_seed_all(235) 27 | torch.manual_seed(235) 28 | 29 | cudnn.enabled = True 30 | cudnn.benchmark = True 31 | 32 | 33 | def main(): 34 | """Training for softmax classifier only. 35 | """ 36 | # Retreve experiment configurations. 37 | args = parse_args('Training for softmax classifier only.') 38 | 39 | # Retrieve GPU informations. 40 | device_ids = [int(i) for i in config.gpus.split(',')] 41 | gpu_ids = [torch.device('cuda', i) for i in device_ids] 42 | num_gpus = len(gpu_ids) 43 | 44 | # Create logger and tensorboard writer. 45 | summary_writer = tensorboardX.SummaryWriter(logdir=args.snapshot_dir) 46 | color_map = vis_utils.load_color_map(config.dataset.color_map_path) 47 | 48 | model_path_template = os.path.join(args.snapshot_dir, 49 | 'model-{:d}.pth') 50 | optimizer_path_template = os.path.join(args.snapshot_dir, 51 | 'model-{:d}.state.pth') 52 | 53 | # Create data loaders. 54 | train_dataset = ListTagClassifierDataset( 55 | data_dir=args.data_dir, 56 | data_list=args.data_list, 57 | img_mean=config.network.pixel_means, 58 | img_std=config.network.pixel_stds, 59 | size=config.train.crop_size, 60 | random_crop=config.train.random_crop, 61 | random_scale=config.train.random_scale, 62 | random_mirror=config.train.random_mirror, 63 | random_grayscale=True, 64 | random_blur=True, 65 | training=True) 66 | 67 | train_loader = torch.utils.data.DataLoader( 68 | train_dataset, 69 | batch_size=config.train.batch_size, 70 | shuffle=config.train.shuffle, 71 | num_workers=num_gpus * config.num_threads, 72 | drop_last=False, 73 | collate_fn=train_dataset.collate_fn) 74 | 75 | # Create models. 76 | if config.network.backbone_types == 'panoptic_pspnet_101': 77 | embedding_model = resnet_101_pspnet(config).cuda() 78 | elif config.network.backbone_types == 'panoptic_deeplab_101': 79 | embedding_model = resnet_101_deeplab(config).cuda() 80 | else: 81 | raise ValueError('Not support ' + config.network.backbone_types) 82 | 83 | if config.network.prediction_types == 'softmax_classifier': 84 | prediction_model = softmax_classifier(config).cuda() 85 | else: 86 | raise ValueError('Not support ' + config.network.prediction_types) 87 | 88 | # Use customized optimizer and pass lr=1 to support different lr for 89 | # different weights. 90 | optimizer = SGD( 91 | embedding_model.get_params_lr() + prediction_model.get_params_lr(), 92 | lr=1, 93 | momentum=config.train.momentum, 94 | weight_decay=config.train.weight_decay) 95 | optimizer.zero_grad() 96 | 97 | # Load pre-trained weights. 98 | curr_iter = config.train.begin_iteration 99 | if config.network.pretrained: 100 | print('Loading pre-trained model: {:s}'.format(config.network.pretrained)) 101 | embedding_model.load_state_dict( 102 | torch.load(config.network.pretrained)['embedding_model'], 103 | resume=True) 104 | else: 105 | raise ValueError('Pre-trained model is required.') 106 | 107 | # Distribute model weights to multi-gpus. 108 | embedding_model = DataParallel(embedding_model, 109 | device_ids=device_ids, 110 | gather_output=False) 111 | prediction_model = DataParallel(prediction_model, 112 | device_ids=device_ids, 113 | gather_output=False) 114 | 115 | embedding_model.eval() 116 | prediction_model.train() 117 | print(embedding_model) 118 | print(prediction_model) 119 | 120 | # Create memory bank. 121 | memory_banks = {} 122 | 123 | # start training 124 | train_iterator = train_loader.__iter__() 125 | iterator_index = 0 126 | pbar = tqdm(range(curr_iter, config.train.max_iteration)) 127 | for curr_iter in pbar: 128 | # Check if the rest of datas is enough to iterate through; 129 | # otherwise, re-initiate the data iterator. 130 | if iterator_index + num_gpus >= len(train_loader): 131 | train_iterator = train_loader.__iter__() 132 | iterator_index = 0 133 | 134 | # Feed-forward. 135 | image_batch, label_batch = other_utils.prepare_datas_and_labels_mgpu( 136 | train_iterator, gpu_ids) 137 | iterator_index += num_gpus 138 | 139 | # Generate embeddings, clustering and prototypes. 140 | with torch.no_grad(): 141 | embeddings = embedding_model(*zip(image_batch, label_batch)) 142 | 143 | # Compute loss. 144 | outputs = prediction_model(*zip(embeddings, label_batch)) 145 | outputs = scatter_gather.gather(outputs, gpu_ids[0]) 146 | losses = [] 147 | for k in ['sem_ann_loss']: 148 | loss = outputs.get(k, None) 149 | if loss is not None: 150 | outputs[k] = loss.mean() 151 | losses.append(outputs[k]) 152 | loss = sum(losses) 153 | acc = outputs['accuracy'].mean() 154 | 155 | # Backward propogation. 156 | if config.train.lr_policy == 'step': 157 | lr = train_utils.lr_step(config.train.base_lr, 158 | curr_iter, 159 | config.train.decay_iterations, 160 | config.train.warmup_iteration) 161 | else: 162 | lr = train_utils.lr_poly(config.train.base_lr, 163 | curr_iter, 164 | config.train.max_iteration, 165 | config.train.warmup_iteration) 166 | 167 | optimizer.zero_grad() 168 | loss.backward() 169 | optimizer.step(lr) 170 | 171 | # Snapshot the trained model. 172 | if ((curr_iter+1) % config.train.snapshot_step == 0 173 | or curr_iter == config.train.max_iteration - 1): 174 | model_state_dict = { 175 | 'embedding_model': embedding_model.module.state_dict(), 176 | 'prediction_model': prediction_model.module.state_dict()} 177 | torch.save(model_state_dict, 178 | model_path_template.format(curr_iter)) 179 | torch.save(optimizer.state_dict(), 180 | optimizer_path_template.format(curr_iter)) 181 | 182 | # Print loss in the progress bar. 183 | line = 'loss = {:.3f}, acc = {:.3f}, lr = {:.6f}'.format( 184 | loss.item(), acc.item(), lr) 185 | pbar.set_description(line) 186 | 187 | 188 | if __name__ == '__main__': 189 | main() 190 | -------------------------------------------------------------------------------- /pyscripts/inference/inference_softmax_crf_msc.py: -------------------------------------------------------------------------------- 1 | """Inference script for semantic segmentation by softmax classifier. 2 | """ 3 | from __future__ import print_function, division 4 | import os 5 | import math 6 | 7 | import PIL.Image as Image 8 | import numpy as np 9 | import cv2 10 | import torch 11 | import torch.nn.functional as F 12 | import torch.backends.cudnn as cudnn 13 | from tqdm import tqdm 14 | 15 | import spml.data.transforms as transforms 16 | import spml.utils.general.vis as vis_utils 17 | import spml.utils.general.others as other_utils 18 | from spml.data.datasets.base_dataset import ListDataset 19 | from spml.config.default import config 20 | from spml.config.parse_args import parse_args 21 | from spml.models.embeddings.resnet_pspnet import resnet_50_pspnet, resnet_101_pspnet 22 | from spml.models.embeddings.resnet_deeplab import resnet_50_deeplab, resnet_101_deeplab 23 | from spml.models.predictions.softmax_classifier import softmax_classifier 24 | from spml.models.crf import DenseCRF 25 | 26 | cudnn.enabled = True 27 | cudnn.benchmark = True 28 | 29 | 30 | def main(): 31 | """Inference for semantic segmentation. 32 | """ 33 | # Retreve experiment configurations. 34 | args = parse_args('Inference for semantic segmentation.') 35 | 36 | # Create directories to save results. 37 | semantic_dir = os.path.join(args.save_dir, 'semantic_gray') 38 | semantic_rgb_dir = os.path.join(args.save_dir, 'semantic_color') 39 | os.makedirs(semantic_dir, exist_ok=True) 40 | os.makedirs(semantic_rgb_dir, exist_ok=True) 41 | 42 | # Create color map. 43 | color_map = vis_utils.load_color_map(config.dataset.color_map_path) 44 | color_map = color_map.numpy() 45 | 46 | # Create data loaders. 47 | test_dataset = ListDataset( 48 | data_dir=args.data_dir, 49 | data_list=args.data_list, 50 | img_mean=config.network.pixel_means, 51 | img_std=config.network.pixel_stds, 52 | size=None, 53 | random_crop=False, 54 | random_scale=False, 55 | random_mirror=False, 56 | training=False) 57 | test_image_paths = test_dataset.image_paths 58 | 59 | # Create models. 60 | if config.network.backbone_types == 'panoptic_pspnet_101': 61 | embedding_model = resnet_101_pspnet(config).cuda() 62 | elif config.network.backbone_types == 'panoptic_deeplab_101': 63 | embedding_model = resnet_101_deeplab(config).cuda() 64 | else: 65 | raise ValueError('Not support ' + config.network.backbone_types) 66 | 67 | prediction_model = softmax_classifier(config).cuda() 68 | embedding_model.eval() 69 | prediction_model.eval() 70 | 71 | # Load trained weights. 72 | model_path_template = os.path.join(args.snapshot_dir, 'model-{:d}.pth') 73 | save_iter = config.train.max_iteration - 1 74 | embedding_model.load_state_dict( 75 | torch.load(model_path_template.format(save_iter))['embedding_model'], 76 | resume=True) 77 | prediction_model.load_state_dict( 78 | torch.load(model_path_template.format(save_iter))['prediction_model']) 79 | 80 | # Define CRF. 81 | postprocessor = DenseCRF( 82 | iter_max=args.crf_iter_max, 83 | pos_xy_std=args.crf_pos_xy_std, 84 | pos_w=args.crf_pos_w, 85 | bi_xy_std=args.crf_bi_xy_std, 86 | bi_rgb_std=args.crf_bi_rgb_std, 87 | bi_w=args.crf_bi_w,) 88 | 89 | 90 | # Start inferencing. 91 | with torch.no_grad(): 92 | for data_index in tqdm(range(len(test_dataset))): 93 | # Image path. 94 | image_path = test_image_paths[data_index] 95 | base_name = os.path.basename(image_path).replace('.jpg', '.png') 96 | 97 | # Image resolution. 98 | original_image_batch, original_label_batch, _ = test_dataset[data_index] 99 | image_h, image_w = original_image_batch['image'].shape[-2:] 100 | batches = other_utils.create_image_pyramid( 101 | original_image_batch, original_label_batch, 102 | scales=[0.5, 0.75, 1, 1.25, 1.5], 103 | is_flip=True) 104 | 105 | semantic_logits = [] 106 | for image_batch, label_batch, data_info in batches: 107 | resize_image_h, resize_image_w = image_batch['image'].shape[-2:] 108 | # Crop and Pad the input image. 109 | image_batch['image'] = transforms.resize_with_pad( 110 | image_batch['image'].transpose(1, 2, 0), 111 | config.test.crop_size, 112 | image_pad_value=0).transpose(2, 0, 1) 113 | image_batch['image'] = torch.FloatTensor( 114 | image_batch['image'][np.newaxis, ...]).cuda() 115 | pad_image_h, pad_image_w = image_batch['image'].shape[-2:] 116 | 117 | # Create the ending index of each patch. 118 | stride_h, stride_w = config.test.stride 119 | crop_h, crop_w = config.test.crop_size 120 | npatches_h = math.ceil(1.0 * (pad_image_h-crop_h) / stride_h) + 1 121 | npatches_w = math.ceil(1.0 * (pad_image_w-crop_w) / stride_w) + 1 122 | patch_ind_h = np.linspace( 123 | crop_h, pad_image_h, npatches_h, dtype=np.int32) 124 | patch_ind_w = np.linspace( 125 | crop_w, pad_image_w, npatches_w, dtype=np.int32) 126 | 127 | # Create place holder for full-resolution embeddings. 128 | semantic_logit = torch.FloatTensor( 129 | 1, config.dataset.num_classes, pad_image_h, pad_image_w).zero_().to("cuda:0") 130 | counts = torch.FloatTensor( 131 | 1, 1, pad_image_h, pad_image_w).zero_().to("cuda:0") 132 | for ind_h in patch_ind_h: 133 | for ind_w in patch_ind_w: 134 | sh, eh = ind_h - crop_h, ind_h 135 | sw, ew = ind_w - crop_w, ind_w 136 | crop_image_batch = { 137 | k: v[:, :, sh:eh, sw:ew] for k, v in image_batch.items()} 138 | 139 | # Feed-forward. 140 | crop_embeddings = embedding_model( 141 | crop_image_batch, resize_as_input=True) 142 | crop_outputs = prediction_model(crop_embeddings) 143 | semantic_logit[..., sh:eh, sw:ew] += crop_outputs['semantic_logit'].to("cuda:0") 144 | counts[..., sh:eh, sw:ew] += 1 145 | semantic_logit /= counts 146 | semantic_logit = semantic_logit[..., :resize_image_h, :resize_image_w] 147 | semantic_logit = F.interpolate( 148 | semantic_logit, size=(image_h, image_w), mode='bilinear') 149 | semantic_logit = F.softmax(semantic_logit, dim=1) 150 | semantic_logit = semantic_logit.data.cpu().numpy().astype(np.float32) 151 | if data_info['is_flip']: 152 | semantic_logit = semantic_logit[..., ::-1] 153 | semantic_logits.append(semantic_logit) 154 | 155 | semantic_logits = np.concatenate(semantic_logits, axis=0) 156 | semantic_prob = np.mean(semantic_logits, axis=0) 157 | 158 | # DenseCRF post-processing. 159 | image = original_image_batch['image'].astype(np.float32) 160 | image = image.transpose(1, 2, 0) 161 | image *= np.reshape(config.network.pixel_stds, (1, 1, 3)) 162 | image += np.reshape(config.network.pixel_means, (1, 1, 3)) 163 | image = image * 255 164 | image = image.astype(np.uint8) 165 | 166 | semantic_prob = postprocessor(image, semantic_prob) 167 | 168 | semantic_pred = np.argmax(semantic_prob, axis=0).astype(np.uint8) 169 | 170 | # Save semantic predictions. 171 | semantic_pred_name = os.path.join(semantic_dir, base_name) 172 | Image.fromarray(semantic_pred, mode='L').save(semantic_pred_name) 173 | 174 | semantic_pred_rgb = color_map[semantic_pred] 175 | semantic_pred_rgb_name = os.path.join(semantic_rgb_dir, base_name) 176 | Image.fromarray(semantic_pred_rgb, mode='RGB').save( 177 | semantic_pred_rgb_name) 178 | 179 | 180 | if __name__ == '__main__': 181 | main() 182 | -------------------------------------------------------------------------------- /spml/data/datasets/densepose_dataset.py: -------------------------------------------------------------------------------- 1 | """Classes for DensePose dataset. 2 | """ 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | from spml.data.datasets.base_dataset import ListDataset 8 | import spml.data.transforms as transforms 9 | 10 | 11 | class DenseposeDataset(ListDataset): 12 | """Class of Densepose dataset which takes a file of paired list of 13 | images and labels for Densepose. 14 | """ 15 | def __init__(self, 16 | data_dir, 17 | data_list, 18 | img_mean=(0, 0, 0), 19 | img_std=(1, 1, 1), 20 | size=None, 21 | random_crop=False, 22 | random_scale=False, 23 | random_mirror=False, 24 | training=False): 25 | """Base class for Denspose Dataset. 26 | 27 | Args: 28 | data_dir: A string indicates root directory of images and labels. 29 | data_list: A list of strings which indicate path of paired images 30 | and labels. 'image_path semantic_label_path instance_label_path'. 31 | img_mean: A list of scalars indicate the mean image value per channel. 32 | img_std: A list of scalars indicate the std image value per channel. 33 | size: A tuple of scalars indicate size of output image and labels. 34 | The output resolution remain the same if `size` is None. 35 | random_crop: enable/disable random_crop for data augmentation. 36 | If True, adopt randomly cropping as augmentation. 37 | random_scale: enable/disable random_scale for data augmentation. 38 | If True, adopt adopt randomly scaling as augmentation. 39 | random_mirror: enable/disable random_mirror for data augmentation. 40 | If True, adopt adopt randomly mirroring as augmentation. 41 | training: enable/disable training to set dataset for training and 42 | testing. If True, set to training mode. 43 | """ 44 | super(DenseposeDataset, self).__init__( 45 | data_dir, 46 | data_list, 47 | img_mean, 48 | img_std, 49 | size, 50 | random_crop, 51 | random_scale, 52 | random_mirror, 53 | training) 54 | 55 | self.part_labels = { 56 | 0: 'background', 57 | 1: 'torso', 58 | 2: 'right hand', 59 | 3: 'left hand', 60 | 4: 'left foot', 61 | 5: 'right foot', 62 | 6: 'right thigh', 63 | 7: 'left thigh', 64 | 8: 'right leg', 65 | 9: 'left leg', 66 | 10: 'left arm', 67 | 11: 'right arm', 68 | 12: 'left forearm', 69 | 13: 'right forearm', 70 | 14: 'head' 71 | } 72 | 73 | # Remapping part labels (for horizontally flipping). 74 | self.part_label_remap = np.arange(256, dtype=np.uint8) 75 | self.part_label_remap[:15] = ( 76 | [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]) 77 | 78 | def _training_preprocess(self, idx): 79 | """Data preprocessing for training. 80 | """ 81 | assert(self.size is not None) 82 | image, semantic_label, instance_label = self._get_datas_by_index(idx) 83 | 84 | label = np.stack([semantic_label, instance_label], axis=2) 85 | 86 | # The part label should be remapped after mirroring. 87 | if self.random_mirror: 88 | is_flip = np.random.uniform(0, 1.0) >= 0.5 89 | if is_flip: 90 | image = image[:, ::-1, ...] 91 | label = label[:, ::-1, ...] 92 | label[..., 0] = self.part_label_remap[label[..., 0]] 93 | 94 | if self.random_scale: 95 | image, label = transforms.random_resize(image, label, 0.5, 1.5) 96 | 97 | if self.random_crop: 98 | image, label = transforms.random_crop_with_pad( 99 | image, label, self.size, self.img_mean, 255) 100 | 101 | semantic_label, instance_label = label[..., 0], label[..., 1] 102 | 103 | return image, semantic_label, instance_label 104 | 105 | 106 | class DenseposeClassifierDataset(DenseposeDataset): 107 | 108 | def __init__(self, 109 | data_dir, 110 | data_list, 111 | img_mean=(0, 0, 0), 112 | img_std=(1, 1, 1), 113 | size=None, 114 | random_crop=False, 115 | random_scale=False, 116 | random_mirror=False, 117 | random_grayscale=False, 118 | random_blur=False, 119 | training=False): 120 | """Class of Densepose Dataset for training softmax classifier, 121 | where we introduce more data augmentation. 122 | 123 | Args: 124 | data_dir: A string indicates root directory of images and labels. 125 | data_list: A list of strings which indicate path of paired images 126 | and labels. 'image_path semantic_label_path instance_label_path'. 127 | img_mean: A list of scalars indicate the mean image value per channel. 128 | img_std: A list of scalars indicate the std image value per channel. 129 | size: A tuple of scalars indicate size of output image and labels. 130 | The output resolution remain the same if `size` is None. 131 | random_crop: enable/disable random_crop for data augmentation. 132 | If True, adopt randomly cropping as augmentation. 133 | random_scale: enable/disable random_scale for data augmentation. 134 | If True, adopt randomly scaling as augmentation. 135 | random_mirror: enable/disable random_mirror for data augmentation. 136 | If True, adopt randomly mirroring as augmentation. 137 | random_grayscale: enable/disable random_grayscale for data augmentation. 138 | If True, adopt randomly converting RGB to grayscale as augmentation. 139 | random_blur: enable/disable random_blur for data augmentation. 140 | If True, adopt randomly applying Gaussian blur as augmentation. 141 | training: enable/disable training to set dataset for training and 142 | testing. If True, set to training mode. 143 | """ 144 | super(DenseposeClassifierDataset, self).__init__( 145 | data_dir, 146 | data_list, 147 | img_mean, 148 | img_std, 149 | size, 150 | random_crop, 151 | random_scale, 152 | random_mirror, 153 | training) 154 | self.random_grayscale = random_grayscale 155 | self.random_blur = random_blur 156 | 157 | def _training_preprocess(self, idx): 158 | """Data preprocessing for training. 159 | """ 160 | assert(self.size is not None) 161 | image, semantic_label, instance_label = self._get_datas_by_index(idx) 162 | 163 | label = np.stack([semantic_label, instance_label], axis=2) 164 | 165 | # The part label should be changed accordingly. 166 | if self.random_mirror: 167 | is_flip = np.random.uniform(0, 1.0) >= 0.5 168 | if is_flip: 169 | image = image[:, ::-1, ...] 170 | label = label[:, ::-1, ...] 171 | label[..., 0] = self.part_label_remap[label[..., 0]] 172 | 173 | if self.random_scale: 174 | image, label = transforms.random_resize(image, label, 0.5, 2.0) 175 | 176 | if self.random_crop: 177 | image, label = transforms.random_crop_with_pad( 178 | image, label, self.size, self.img_mean, 255) 179 | 180 | # Randomly convert RGB to grayscale. 181 | if self.random_grayscale and np.random.uniform(0, 1.0) < 0.3: 182 | rgb2gray = np.array([0.3, 0.59, 0.11], dtype=np.float32) 183 | image = np.sum( 184 | image * np.reshape(rgb2gray, (1, 1, 3)), axis=-1, keepdims=True) 185 | image = np.tile(image, (1,1,3)) 186 | 187 | # Randomly apply Gaussian blur. 188 | if self.random_blur and np.random.uniform(0, 1.0) < 0.5: 189 | sigma = np.random.uniform(0.1, 5) 190 | w_x, w_y = np.meshgrid(np.linspace(-2, 2, 5), np.linspace(-2, 2, 5)) 191 | weight = np.exp(- (w_x ** 2 + w_y ** 2) / sigma**2) 192 | weight = weight / weight.sum() 193 | image = cv2.filter2D(image, -1, weight) 194 | 195 | 196 | semantic_label, instance_label = label[..., 0], label[..., 1] 197 | 198 | return image, semantic_label, instance_label 199 | 200 | -------------------------------------------------------------------------------- /pyscripts/inference/inference_softmax_crf.py: -------------------------------------------------------------------------------- 1 | """Inference script for semantic segmentation by softmax classifier. 2 | """ 3 | from __future__ import print_function, division 4 | import os 5 | import math 6 | 7 | import PIL.Image as Image 8 | import numpy as np 9 | import cv2 10 | import torch 11 | import torch.backends.cudnn as cudnn 12 | import torch.nn.functional as F 13 | 14 | import spml.data.transforms as transforms 15 | import spml.utils.general.vis as vis_utils 16 | from spml.data.datasets.base_dataset import ListDataset 17 | from spml.config.default import config 18 | from spml.config.parse_args import parse_args 19 | from spml.models.embeddings.resnet_pspnet import resnet_50_pspnet, resnet_101_pspnet 20 | from spml.models.embeddings.resnet_deeplab import resnet_50_deeplab, resnet_101_deeplab 21 | from spml.models.predictions.softmax_classifier import softmax_classifier 22 | from spml.models.crf import DenseCRF 23 | 24 | cudnn.enabled = True 25 | cudnn.benchmark = True 26 | 27 | 28 | def main(): 29 | """Inference for semantic segmentation. 30 | """ 31 | # Retreve experiment configurations. 32 | args = parse_args('Inference for semantic segmentation.') 33 | 34 | # Create directories to save results. 35 | semantic_dir = os.path.join(args.save_dir, 'semantic_gray') 36 | semantic_rgb_dir = os.path.join(args.save_dir, 'semantic_color') 37 | if not os.path.isdir(semantic_dir): 38 | os.makedirs(semantic_dir) 39 | if not os.path.isdir(semantic_rgb_dir): 40 | os.makedirs(semantic_rgb_dir) 41 | 42 | # Create color map. 43 | color_map = vis_utils.load_color_map(config.dataset.color_map_path) 44 | color_map = color_map.numpy() 45 | 46 | # Create data loaders. 47 | test_dataset = ListDataset( 48 | data_dir=args.data_dir, 49 | data_list=args.data_list, 50 | img_mean=config.network.pixel_means, 51 | img_std=config.network.pixel_stds, 52 | size=None, 53 | random_crop=False, 54 | random_scale=False, 55 | random_mirror=False, 56 | training=False) 57 | test_image_paths = test_dataset.image_paths 58 | 59 | # Create models. 60 | if config.network.backbone_types == 'panoptic_pspnet_101': 61 | embedding_model = resnet_101_pspnet(config).cuda() 62 | elif config.network.backbone_types == 'panoptic_deeplab_101': 63 | embedding_model = resnet_101_deeplab(config).cuda() 64 | else: 65 | raise ValueError('Not support ' + config.network.backbone_types) 66 | 67 | prediction_model = softmax_classifier(config).cuda() 68 | embedding_model.eval() 69 | prediction_model.eval() 70 | 71 | # Load trained weights. 72 | model_path_template = os.path.join(args.snapshot_dir, 'model-{:d}.pth') 73 | save_iter = config.train.max_iteration - 1 74 | embedding_model.load_state_dict( 75 | torch.load(model_path_template.format(save_iter))['embedding_model'], 76 | resume=True) 77 | prediction_model.load_state_dict( 78 | torch.load(model_path_template.format(save_iter))['prediction_model']) 79 | 80 | # Define CRF. 81 | postprocessor = DenseCRF( 82 | iter_max=args.crf_iter_max, 83 | pos_xy_std=args.crf_pos_xy_std, 84 | pos_w=args.crf_pos_w, 85 | bi_xy_std=args.crf_bi_xy_std, 86 | bi_rgb_std=args.crf_bi_rgb_std, 87 | bi_w=args.crf_bi_w,) 88 | 89 | 90 | # Start inferencing. 91 | for data_index in range(len(test_dataset)): 92 | # Image path. 93 | image_path = test_image_paths[data_index] 94 | base_name = os.path.basename(image_path).replace('.jpg', '.png') 95 | 96 | # Image resolution. 97 | image_batch, _, _ = test_dataset[data_index] 98 | image_h, image_w = image_batch['image'].shape[-2:] 99 | 100 | # Resize the input image. 101 | if config.test.image_size > 0: 102 | image_batch['image'] = transforms.resize_with_interpolation( 103 | image_batch['image'].transpose(1, 2, 0), 104 | config.test.image_size, 105 | method='bilinear').transpose(2, 0, 1) 106 | resize_image_h, resize_image_w = image_batch['image'].shape[-2:] 107 | 108 | # Crop and Pad the input image. 109 | image_batch['image'] = transforms.resize_with_pad( 110 | image_batch['image'].transpose(1, 2, 0), 111 | config.test.crop_size, 112 | image_pad_value=0).transpose(2, 0, 1) 113 | image_batch['image'] = torch.FloatTensor(image_batch['image'][np.newaxis, ...]).cuda() 114 | pad_image_h, pad_image_w = image_batch['image'].shape[-2:] 115 | 116 | # Create the ending index of each patch. 117 | stride_h, stride_w = config.test.stride 118 | crop_h, crop_w = config.test.crop_size 119 | npatches_h = math.ceil(1.0 * (pad_image_h-crop_h) / stride_h) + 1 120 | npatches_w = math.ceil(1.0 * (pad_image_w-crop_w) / stride_w) + 1 121 | patch_ind_h = np.linspace( 122 | crop_h, pad_image_h, npatches_h, dtype=np.int32) 123 | patch_ind_w = np.linspace( 124 | crop_w, pad_image_w, npatches_w, dtype=np.int32) 125 | 126 | # Create place holder for full-resolution embeddings. 127 | outputs = {} 128 | with torch.no_grad(): 129 | for ind_h in patch_ind_h: 130 | for ind_w in patch_ind_w: 131 | sh, eh = ind_h - crop_h, ind_h 132 | sw, ew = ind_w - crop_w, ind_w 133 | crop_image_batch = { 134 | k: v[:, :, sh:eh, sw:ew] for k, v in image_batch.items()} 135 | 136 | # Feed-forward. 137 | crop_embeddings = embedding_model( 138 | crop_image_batch, resize_as_input=True) 139 | crop_outputs = prediction_model(crop_embeddings) 140 | 141 | for name, crop_out in crop_outputs.items(): 142 | 143 | if crop_out is not None: 144 | if name not in outputs.keys(): 145 | output_shape = list(crop_out.shape) 146 | output_shape[-2:] = pad_image_h, pad_image_w 147 | outputs[name] = torch.zeros(output_shape, dtype=crop_out.dtype).cuda() 148 | outputs[name][..., sh:eh, sw:ew] += crop_out 149 | 150 | # Save semantic predictions. 151 | semantic_logits = outputs.get('semantic_logit', None) 152 | if semantic_logits is not None: 153 | semantic_prob = F.softmax(semantic_logits, dim=1) 154 | semantic_prob = semantic_prob[0, :, :resize_image_h, :resize_image_w] 155 | semantic_prob = semantic_prob.data.cpu().numpy().astype(np.float32) 156 | 157 | # DenseCRF post-processing. 158 | image = image_batch['image'][0].data.cpu().numpy().astype(np.float32) 159 | image = image.transpose(1, 2, 0) 160 | image *= np.reshape(config.network.pixel_stds, (1, 1, 3)) 161 | image += np.reshape(config.network.pixel_means, (1, 1, 3)) 162 | image = image * 255 163 | image = image.astype(np.uint8) 164 | image = image[:resize_image_h, :resize_image_w, :] 165 | 166 | semantic_prob = postprocessor(image, semantic_prob) 167 | 168 | #semantic_pred = torch.argmax(semantic_logits, 1) 169 | semantic_pred = np.argmax(semantic_prob, axis=0).astype(np.uint8) 170 | #semantic_pred = (semantic_pred.view(pad_image_h, pad_image_w) 171 | # .cpu() 172 | # .data 173 | # .numpy() 174 | # .astype(np.uint8)) 175 | #semantic_pred = semantic_pred[:resize_image_h, :resize_image_w] 176 | semantic_pred = cv2.resize( 177 | semantic_pred, 178 | (image_w, image_h), 179 | interpolation=cv2.INTER_NEAREST) 180 | 181 | semantic_pred_name = os.path.join(semantic_dir, base_name) 182 | Image.fromarray(semantic_pred, mode='L').save(semantic_pred_name) 183 | 184 | semantic_pred_rgb = color_map[semantic_pred] 185 | semantic_pred_rgb_name = os.path.join(semantic_rgb_dir, base_name) 186 | Image.fromarray(semantic_pred_rgb, mode='RGB').save( 187 | semantic_pred_rgb_name) 188 | 189 | # Clean GPU memory cache to save more space. 190 | outputs = {} 191 | crop_embeddings = {} 192 | crop_outputs = {} 193 | torch.cuda.empty_cache() 194 | 195 | 196 | if __name__ == '__main__': 197 | main() 198 | -------------------------------------------------------------------------------- /spml/data/datasets/base_dataset.py: -------------------------------------------------------------------------------- 1 | """Base classes for Dataset. 2 | """ 3 | 4 | import os 5 | 6 | import torch 7 | import torch.utils.data 8 | import numpy as np 9 | import PIL.Image as Image 10 | import cv2 11 | 12 | import spml.data.transforms as transforms 13 | 14 | 15 | class ListDataset(torch.utils.data.Dataset): 16 | """Base class of dataset which takes a file of paired list of 17 | images, semantic labels and instance labels. 18 | """ 19 | 20 | def __init__(self, 21 | data_dir, 22 | data_list, 23 | img_mean=(0, 0, 0), 24 | img_std=(1, 1, 1), 25 | size=None, 26 | random_crop=False, 27 | random_scale=False, 28 | random_mirror=False, 29 | training=False): 30 | """Base class for Dataset. 31 | 32 | Args: 33 | data_dir: A string indicates root directory of images and labels. 34 | data_list: A list of strings which indicate path of paired images 35 | and labels. 'image_path semantic_label_path instance_label_path'. 36 | img_mean: A list of scalars indicate the mean image value per channel. 37 | img_std: A list of scalars indicate the std image value per channel. 38 | size: A tuple of scalars indicate size of output image and labels. 39 | The output resolution remain the same if `size` is None. 40 | random_crop: enable/disable random_crop for data augmentation. 41 | If True, adopt randomly cropping as augmentation. 42 | random_scale: enable/disable random_scale for data augmentation. 43 | If True, adopt adopt randomly scaling as augmentation. 44 | random_mirror: enable/disable random_mirror for data augmentation. 45 | If True, adopt adopt randomly mirroring as augmentation. 46 | training: enable/disable training to set dataset for training and 47 | testing. If True, set to training mode. 48 | """ 49 | self.image_paths, self.semantic_label_paths, self.instance_label_paths = ( 50 | self._read_image_and_label_paths(data_dir, data_list)) 51 | 52 | self.training = training 53 | self.img_mean = img_mean 54 | self.img_std = img_std 55 | self.size = size 56 | self.random_crop = random_crop 57 | self.random_scale = random_scale 58 | self.random_mirror = random_mirror 59 | 60 | def eval(self): 61 | """Set the dataset to evaluation mode. 62 | """ 63 | self.training = False 64 | 65 | def train(self): 66 | """Set the dataset to training mode. 67 | """ 68 | self.training = True 69 | 70 | def _read_image_and_label_paths(self, data_dir, data_list): 71 | """Parse strings into lists of image, semantic label and 72 | instance label paths. 73 | 74 | Args: 75 | data_dir: A string indicates root directory of images and labels. 76 | data_list: A list of strings which indicate path of paired images 77 | and labels. 'image_path semantic_label_path instance_label_path'. 78 | 79 | Return: 80 | Threee lists of file paths. 81 | """ 82 | images, semantic_labels, instance_labels = [], [], [] 83 | with open(data_list, 'r') as list_file: 84 | for line in list_file: 85 | line = line.strip('\n') 86 | try: 87 | img, semantic_lab, instance_lab = line.split(' ') 88 | except: 89 | img = line 90 | semantic_lab = instance_lab = None 91 | 92 | images.append(os.path.join(data_dir, img)) 93 | 94 | if semantic_lab is not None: 95 | semantic_labels.append(os.path.join(data_dir, semantic_lab)) 96 | 97 | if instance_lab is not None: 98 | instance_labels.append(os.path.join(data_dir, instance_lab)) 99 | 100 | return images, semantic_labels, instance_labels 101 | 102 | def _read_image(self, image_path): 103 | """Read BGR uint8 image. 104 | """ 105 | img = np.array(Image.open(image_path).convert(mode='RGB')) 106 | img = img.astype(np.float32) / 255 107 | return img 108 | 109 | def _read_label(self, label_path): 110 | """Read uint8 label. 111 | """ 112 | return np.array(Image.open(label_path).convert(mode='L')) 113 | 114 | def _get_datas_by_index(self, idx): 115 | """Return image_path, semantic_label_path, instance_label_path 116 | by the given index. 117 | """ 118 | image_path = self.image_paths[idx] 119 | image = self._read_image(image_path) 120 | 121 | if len(self.semantic_label_paths) > 0: 122 | semantic_label_path = self.semantic_label_paths[idx] 123 | semantic_label = self._read_label(semantic_label_path) 124 | else: 125 | semantic_label = None 126 | 127 | if len(self.instance_label_paths) > 0: 128 | instance_label_path = self.instance_label_paths[idx] 129 | instance_label = self._read_label(instance_label_path) 130 | else: 131 | instance_label = None 132 | 133 | return image, semantic_label, instance_label 134 | 135 | def _training_preprocess(self, idx): 136 | """Data preprocessing for training. 137 | """ 138 | assert(self.size is not None) 139 | image, semantic_label, instance_label = self._get_datas_by_index(idx) 140 | 141 | label = np.stack([semantic_label, instance_label], axis=2) 142 | 143 | if self.random_mirror: 144 | image, label = transforms.random_mirror(image, label) 145 | 146 | if self.random_scale: 147 | image, label = transforms.random_resize(image, label, 0.5, 1.5) 148 | 149 | if self.random_crop: 150 | image, label = transforms.random_crop_with_pad( 151 | image, label, self.size, self.img_mean, 255) 152 | 153 | semantic_label, instance_label = label[..., 0], label[..., 1] 154 | 155 | return image, semantic_label, instance_label 156 | 157 | def _eval_preprocess(self, idx): 158 | """Data preprocessing for evaluationg. 159 | """ 160 | image, semantic_label, instance_label = self._get_datas_by_index(idx) 161 | 162 | if self.size is not None: 163 | image = transforms.resize_with_pad( 164 | image, self.size, self.img_mean) 165 | 166 | image = image[:self.size[0], :self.size[1], ...] 167 | 168 | return image, semantic_label, instance_label 169 | 170 | def __len__(self): 171 | """Total number of datas in the dataset. 172 | """ 173 | return len(self.image_paths) 174 | 175 | def __getitem__(self, idx): 176 | """Retrive image and label by index. 177 | """ 178 | if self.training: 179 | image, semantic_label, instance_label = self._training_preprocess(idx) 180 | else: 181 | image, semantic_label, instance_label = self._eval_preprocess(idx) 182 | 183 | image = image - np.array(self.img_mean, dtype=image.dtype) 184 | image = image / np.array(self.img_std, dtype=image.dtype) 185 | 186 | inputs = {'image': image.transpose(2, 0, 1)} 187 | labels = {'semantic_label': semantic_label, 188 | 'instance_label': instance_label} 189 | 190 | return inputs, labels, idx 191 | 192 | def _collate_fn_dict_list(self, dict_list): 193 | """Helper function to collate a list of dictionaries. 194 | """ 195 | outputs = {} 196 | for key in dict_list[0].keys(): 197 | values = [d[key] for d in dict_list] 198 | if values[0] is None: 199 | values = None 200 | elif (values[0].dtype == np.uint8 201 | or values[0].dtype == np.int32 202 | or values[0].dtype == np.int64): 203 | values = torch.LongTensor(values) 204 | elif (values[0].dtype == np.float32 205 | or values[0].dtype == np.float64): 206 | values = torch.FloatTensor(values) 207 | else: 208 | raise ValueError('Unsupported data type') 209 | 210 | outputs[key] = values 211 | 212 | return outputs 213 | 214 | def collate_fn(self, batch): 215 | """Customized collate function to group datas into batch. 216 | """ 217 | images, labels, indices = zip(*batch) 218 | 219 | images = self._collate_fn_dict_list(images) 220 | labels = self._collate_fn_dict_list(labels) 221 | indices = torch.LongTensor(indices) 222 | 223 | return images, labels, indices 224 | -------------------------------------------------------------------------------- /lib/nn/optimizer.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------- 2 | # Unified Panoptic Segmentation Network 3 | # 4 | # Copyright (c) 2018-2019 Uber Technologies, Inc. 5 | # 6 | # Licensed under the Uber Non-Commercial License (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at the root directory of this project. 9 | # 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | # --------------------------------------------------------------------------- 13 | 14 | import math 15 | import torch 16 | from torch.optim.optimizer import Optimizer, required 17 | 18 | class SGD(Optimizer): 19 | r"""Implements stochastic gradient descent (optionally with momentum). 20 | Nesterov momentum is based on the formula from 21 | `On the importance of initialization and momentum in deep learning`__. 22 | Args: 23 | params (iterable): iterable of parameters to optimize or dicts defining 24 | parameter groups 25 | lr (float): learning rate 26 | momentum (float, optional): momentum factor (default: 0) 27 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 28 | dampening (float, optional): dampening for momentum (default: 0) 29 | nesterov (bool, optional): enables Nesterov momentum (default: False) 30 | Example: 31 | >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) 32 | >>> optimizer.zero_grad() 33 | >>> loss_fn(model(input), target).backward() 34 | >>> optimizer.step() 35 | __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf 36 | .. note:: 37 | The implementation of SGD with Momentum/Nesterov subtly differs from 38 | Sutskever et. al. and implementations in some other frameworks. 39 | Considering the specific case of Momentum, the update can be written as 40 | .. math:: 41 | v = \rho * v + g \\ 42 | p = p - lr * v 43 | where p, g, v and :math:`\rho` denote the parameters, gradient, 44 | velocity, and momentum respectively. 45 | This is in contrast to Sutskever et. al. and 46 | other frameworks which employ an update of the form 47 | .. math:: 48 | v = \rho * v + lr * g \\ 49 | p = p - v 50 | The Nesterov version is analogously modified. 51 | """ 52 | 53 | def __init__(self, params, lr=required, momentum=0, dampening=0, 54 | weight_decay=0, nesterov=False): 55 | defaults = dict(lr=lr, momentum=momentum, dampening=dampening, 56 | weight_decay=weight_decay, nesterov=nesterov) 57 | if nesterov and (momentum <= 0 or dampening != 0): 58 | raise ValueError("Nesterov momentum requires a momentum and zero dampening") 59 | assert dampening == 0, "not implemented" 60 | super(SGD, self).__init__(params, defaults) 61 | 62 | def __setstate__(self, state): 63 | super(SGD, self).__setstate__(state) 64 | for group in self.param_groups: 65 | group.setdefault('nesterov', False) 66 | 67 | def step(self, lr, closure=None): 68 | """Performs a single optimization step. 69 | Arguments: 70 | closure (callable, optional): A closure that reevaluates the model 71 | and returns the loss. 72 | """ 73 | loss = None 74 | if closure is not None: 75 | loss = closure() 76 | 77 | for group in self.param_groups: 78 | weight_decay = group['weight_decay'] 79 | momentum = group['momentum'] 80 | nesterov = group['nesterov'] 81 | 82 | for p in group['params']: 83 | if p.grad is None: 84 | continue 85 | d_p = p.grad.data 86 | if weight_decay != 0: 87 | d_p.add_(weight_decay, p.data) 88 | if momentum != 0: 89 | param_state = self.state[p] 90 | if 'momentum_buffer' not in param_state: 91 | buf = param_state['momentum_buffer'] = ( 92 | p.data.new().resize_as_(p.data).zero_()) 93 | buf.mul_(momentum).add_(group['lr'] * lr, d_p) 94 | else: 95 | buf = param_state['momentum_buffer'] 96 | buf.mul_(momentum).add_(group['lr'] * lr, d_p) 97 | if nesterov: 98 | d_p = d_p.add(momentum, buf) 99 | else: 100 | d_p = buf 101 | 102 | p.data.add_(-1, d_p) 103 | 104 | return loss 105 | 106 | 107 | class Adam(Optimizer): 108 | """Implements Adam algorithm. 109 | 110 | It has been proposed in `Adam: A Method for Stochastic Optimization`_. 111 | 112 | Arguments: 113 | params (iterable): iterable of parameters to optimize or dicts defining 114 | parameter groups 115 | lr (float, optional): learning rate (default: 1e-3) 116 | betas (Tuple[float, float], optional): coefficients used for computing 117 | running averages of gradient and its square (default: (0.9, 0.999)) 118 | eps (float, optional): term added to the denominator to improve 119 | numerical stability (default: 1e-8) 120 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 121 | 122 | .. _Adam\: A Method for Stochastic Optimization: 123 | https://arxiv.org/abs/1412.6980 124 | """ 125 | 126 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 127 | weight_decay=0): 128 | defaults = dict(lr=lr, betas=betas, eps=eps, 129 | weight_decay=weight_decay) 130 | super(Adam, self).__init__(params, defaults) 131 | 132 | def step(self, lr, closure=None): 133 | """Performs a single optimization step. 134 | 135 | Arguments: 136 | closure (callable, optional): A closure that reevaluates the model 137 | and returns the loss. 138 | """ 139 | loss = None 140 | if closure is not None: 141 | loss = closure() 142 | 143 | for group in self.param_groups: 144 | for p in group['params']: 145 | if p.grad is None: 146 | continue 147 | grad = p.grad.data 148 | state = self.state[p] 149 | 150 | # State initialization 151 | if len(state) == 0: 152 | state['step'] = 0 153 | # Exponential moving average of gradient values 154 | state['exp_avg'] = grad.new().resize_as_(grad).zero_() 155 | # Exponential moving average of squared gradient values 156 | state['exp_avg_sq'] = grad.new().resize_as_(grad).zero_() 157 | 158 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 159 | beta1, beta2 = group['betas'] 160 | 161 | state['step'] += 1 162 | 163 | if group['weight_decay'] != 0: 164 | grad = grad.add(group['weight_decay'], p.data) 165 | 166 | # Decay the first and second moment running average coefficient 167 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 168 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 169 | 170 | denom = exp_avg_sq.sqrt().add_(group['eps']) 171 | 172 | bias_correction1 = 1 - beta1 ** state['step'] 173 | bias_correction2 = 1 - beta2 ** state['step'] 174 | step_size = (lr * group['lr'] * math.sqrt(bias_correction2) 175 | / bias_correction1) 176 | 177 | p.data.addcdiv_(-step_size, exp_avg, denom) 178 | 179 | return loss 180 | 181 | 182 | def clip_grad(parameters, clip_val): 183 | r"""Clips gradient norm of an iterable of parameters. 184 | The norm is computed over all gradients together, as if they were 185 | concatenated into a single vector. Gradients are modified in-place. 186 | Arguments: 187 | parameters (Iterable[Variable]): an iterable of Variables that will have 188 | gradients normalized 189 | max_norm (float or int): max norm of the gradients 190 | norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for 191 | infinity norm. 192 | Returns: 193 | Total norm of the parameters (viewed as a single vector). 194 | """ 195 | parameters = list(filter(lambda p: p.grad is not None, parameters)) 196 | for p in parameters: 197 | p.grad.data.clamp_(-clip_val, clip_val) 198 | -------------------------------------------------------------------------------- /pyscripts/inference/pseudo_camrw_crf.py: -------------------------------------------------------------------------------- 1 | """Genereate pseudo labels from CAM by random walk and CRF. 2 | """ 3 | from __future__ import print_function, division 4 | import os 5 | import math 6 | 7 | import PIL.Image as Image 8 | import numpy as np 9 | import cv2 10 | import torch 11 | import torch.nn.functional as F 12 | import torch.backends.cudnn as cudnn 13 | from tqdm import tqdm 14 | 15 | import spml.data.transforms as transforms 16 | import spml.utils.general.vis as vis_utils 17 | import spml.utils.general.others as other_utils 18 | from spml.data.datasets.base_dataset import ListDataset 19 | from spml.config.default import config 20 | from spml.config.parse_args import parse_args 21 | from spml.models.embeddings.resnet_pspnet import resnet_101_pspnet 22 | from spml.models.embeddings.resnet_deeplab import resnet_101_deeplab 23 | from spml.models.crf import DenseCRF 24 | 25 | cudnn.enabled = True 26 | cudnn.benchmark = True 27 | 28 | ALPHA=6 29 | WALK_STEPS=6 30 | 31 | 32 | def separate_comma(str_comma): 33 | ints = [int(i) for i in str_comma.split(',')] 34 | return ints 35 | 36 | 37 | def main(): 38 | """Generate pseudo labels from CAM by random walk and CRF. 39 | """ 40 | # Retreve experiment configurations. 41 | args = parse_args('Generate pseudo labels from CAM by random walk and CRF.') 42 | config.network.kmeans_num_clusters = separate_comma(args.kmeans_num_clusters) 43 | config.network.label_divisor = args.label_divisor 44 | 45 | # Create directories to save results. 46 | semantic_dir = os.path.join(args.save_dir, 'semantic_gray') 47 | semantic_rgb_dir = os.path.join(args.save_dir, 'semantic_color') 48 | 49 | # Create color map. 50 | color_map = vis_utils.load_color_map(config.dataset.color_map_path) 51 | color_map = color_map.numpy() 52 | 53 | # Create data loaders. 54 | test_dataset = ListDataset( 55 | data_dir=args.data_dir, 56 | data_list=args.data_list, 57 | img_mean=config.network.pixel_means, 58 | img_std=config.network.pixel_stds, 59 | size=None, 60 | random_crop=False, 61 | random_scale=False, 62 | random_mirror=False, 63 | training=False) 64 | test_image_paths = test_dataset.image_paths 65 | 66 | # Define CRF. 67 | postprocessor = DenseCRF( 68 | iter_max=args.crf_iter_max, 69 | pos_xy_std=args.crf_pos_xy_std, 70 | pos_w=args.crf_pos_w, 71 | bi_xy_std=args.crf_bi_xy_std, 72 | bi_rgb_std=args.crf_bi_rgb_std, 73 | bi_w=args.crf_bi_w,) 74 | 75 | # Create models. 76 | if config.network.backbone_types == 'panoptic_pspnet_101': 77 | embedding_model = resnet_101_pspnet(config) 78 | elif config.network.backbone_types == 'panoptic_deeplab_101': 79 | embedding_model = resnet_101_deeplab(config).cuda() 80 | else: 81 | raise ValueError('Not support ' + config.network.backbone_types) 82 | 83 | embedding_model = embedding_model.to("cuda:0") 84 | embedding_model.eval() 85 | 86 | # Load trained weights. 87 | model_path_template = os.path.join(args.snapshot_dir, 'model-{:d}.pth') 88 | save_iter = config.train.max_iteration - 1 89 | embedding_model.load_state_dict( 90 | torch.load(model_path_template.format(save_iter))['embedding_model'], 91 | resume=True) 92 | 93 | # Start inferencing. 94 | for data_index in tqdm(range(len(test_dataset))): 95 | # Image path. 96 | image_path = test_image_paths[data_index] 97 | base_name = os.path.basename(image_path).replace('.jpg', '.png') 98 | 99 | # Image resolution. 100 | image_batch, label_batch, _ = test_dataset[data_index] 101 | image_h, image_w = image_batch['image'].shape[-2:] 102 | 103 | # Load cam 104 | sem_labs = np.unique(label_batch['semantic_label']) 105 | #cam = np.load(os.path.join('/home/twke/repos/SEAM/outputs/train+/cam', base_name.replace('.png', '.npy')), allow_pickle=True).item() 106 | cam = np.load(os.path.join(args.cam_dir, base_name.replace('.png', '.npy')), 107 | allow_pickle=True).item() 108 | cam_full_arr = np.zeros((21, image_h, image_w), np.float32) 109 | for k, v in cam.items(): 110 | cam_full_arr[k+1] = v 111 | cam_full_arr[0] = np.power(1 - np.max(cam_full_arr[1:], axis=0, keepdims=True), ALPHA) 112 | cam_full_arr = torch.from_numpy(cam_full_arr).cuda() 113 | 114 | # Image resolution. 115 | batches = other_utils.create_image_pyramid( 116 | image_batch, label_batch, 117 | scales=[1], 118 | is_flip=True) 119 | 120 | affs = [] 121 | for image_batch, label_batch, data_info in batches: 122 | resize_image_h, resize_image_w = image_batch['image'].shape[-2:] 123 | # Crop and Pad the input image. 124 | image_batch['image'] = transforms.resize_with_pad( 125 | image_batch['image'].transpose(1, 2, 0), 126 | config.test.crop_size, 127 | image_pad_value=0).transpose(2, 0, 1) 128 | for lab_name in ['semantic_label', 'instance_label']: 129 | label_batch[lab_name] = transforms.resize_with_pad( 130 | label_batch[lab_name], 131 | config.test.crop_size, 132 | image_pad_value=255) 133 | image_batch['image'] = torch.FloatTensor( 134 | image_batch['image'][np.newaxis, ...]).to("cuda:0") 135 | for k, v in label_batch.items(): 136 | label_batch[k] = torch.LongTensor(v[np.newaxis, ...]).to("cuda:0") 137 | pad_image_h, pad_image_w = image_batch['image'].shape[-2:] 138 | 139 | with torch.no_grad(): 140 | embeddings = embedding_model(image_batch, label_batch, resize_as_input=True) 141 | embs = embeddings['embedding'][:, :, :resize_image_h, :resize_image_w] 142 | if data_info['is_flip']: 143 | embs = torch.flip(embs, dims=[3]) 144 | embs = F.interpolate(embs, size=(image_h//8, image_w//8), mode='bilinear') 145 | embs = embs / torch.norm(embs, dim=1) 146 | embs_flat = embs.view(embs.shape[1], -1) 147 | aff = torch.matmul(embs_flat.t(), embs_flat).mul_(5).add_(-5).exp_() 148 | affs.append(aff) 149 | 150 | aff = torch.mean(torch.stack(affs, dim=0), dim=0) 151 | cam_full_arr = F.interpolate( 152 | cam_full_arr.unsqueeze(0), scale_factor=1/8., mode='bilinear').squeeze(0) 153 | cam_shape = cam_full_arr.shape[-2:] 154 | 155 | # Start random walk. 156 | aff_mat = aff ** 20 157 | 158 | trans_mat = aff_mat / torch.sum(aff_mat, dim=0, keepdim=True) 159 | for _ in range(WALK_STEPS): 160 | trans_mat = torch.matmul(trans_mat, trans_mat) 161 | 162 | cam_vec = cam_full_arr.view(21, -1) 163 | cam_rw = torch.matmul(cam_vec, trans_mat) 164 | cam_rw = cam_rw.view(21, cam_shape[0], cam_shape[1]) 165 | 166 | cam_rw = cam_rw.data.cpu().numpy() 167 | cam_rw = cv2.resize(cam_rw.transpose(1, 2, 0), 168 | dsize=(image_w, image_h), 169 | interpolation=cv2.INTER_LINEAR) 170 | cam_rw_pred = np.argmax(cam_rw, axis=-1).astype(np.uint8) 171 | 172 | # CRF 173 | image = image_batch['image'].data.cpu().numpy().astype(np.float32) 174 | image = image[0, :, :image_h, :image_w].transpose(1, 2, 0) 175 | image *= np.reshape(config.network.pixel_stds, (1, 1, 3)) 176 | image += np.reshape(config.network.pixel_means, (1, 1, 3)) 177 | image = image * 255 178 | image = image.astype(np.uint8) 179 | cam_rw = postprocessor(image, cam_rw.transpose(2,0,1)) 180 | 181 | cam_rw_pred = np.argmax(cam_rw, axis=0).astype(np.uint8) 182 | 183 | # Save semantic predictions. 184 | semantic_pred = cam_rw_pred 185 | 186 | semantic_pred_name = os.path.join( 187 | semantic_dir, base_name) 188 | if not os.path.isdir(os.path.dirname(semantic_pred_name)): 189 | os.makedirs(os.path.dirname(semantic_pred_name)) 190 | Image.fromarray(semantic_pred, mode='L').save(semantic_pred_name) 191 | 192 | semantic_pred_rgb = color_map[semantic_pred] 193 | semantic_pred_rgb_name = os.path.join( 194 | semantic_rgb_dir, base_name) 195 | if not os.path.isdir(os.path.dirname(semantic_pred_rgb_name)): 196 | os.makedirs(os.path.dirname(semantic_pred_rgb_name)) 197 | Image.fromarray(semantic_pred_rgb, mode='RGB').save( 198 | semantic_pred_rgb_name) 199 | 200 | 201 | if __name__ == '__main__': 202 | main() 203 | -------------------------------------------------------------------------------- /pyscripts/inference/pseudo_softmaxrw_crf.py: -------------------------------------------------------------------------------- 1 | """Genereate pseudo labels by softmax classifier, random walk and CRF. 2 | """ 3 | from __future__ import print_function, division 4 | import os 5 | import math 6 | 7 | import PIL.Image as Image 8 | import numpy as np 9 | import cv2 10 | import torch 11 | import torch.nn.functional as F 12 | import torch.backends.cudnn as cudnn 13 | from tqdm import tqdm 14 | 15 | import spml.data.transforms as transforms 16 | import spml.utils.general.vis as vis_utils 17 | import spml.utils.general.others as other_utils 18 | from spml.data.datasets.base_dataset import ListDataset 19 | from spml.config.default import config 20 | from spml.config.parse_args import parse_args 21 | from spml.models.embeddings.resnet_pspnet import resnet_101_pspnet 22 | from spml.models.embeddings.resnet_deeplab import resnet_101_deeplab 23 | from spml.models.predictions.softmax_classifier import softmax_classifier 24 | from spml.models.crf import DenseCRF 25 | 26 | cudnn.enabled = True 27 | cudnn.benchmark = True 28 | 29 | WALK_STEPS=6 30 | TH=None 31 | 32 | 33 | def main(): 34 | """Generate pseudo labels by softmax classifier, random walk and CRF. 35 | """ 36 | # Retreve experiment configurations. 37 | args = parse_args( 38 | 'Generate pseudo labels by softmax classifier, random walk and CRF.') 39 | 40 | # Create directories to save results. 41 | semantic_dir = os.path.join(args.save_dir, 'semantic_gray') 42 | semantic_rgb_dir = os.path.join(args.save_dir, 'semantic_color') 43 | 44 | # Create color map. 45 | color_map = vis_utils.load_color_map(config.dataset.color_map_path) 46 | color_map = color_map.numpy() 47 | 48 | # Create data loaders. 49 | test_dataset = ListDataset( 50 | data_dir=args.data_dir, 51 | data_list=args.data_list, 52 | img_mean=config.network.pixel_means, 53 | img_std=config.network.pixel_stds, 54 | size=None, 55 | random_crop=False, 56 | random_scale=False, 57 | random_mirror=False, 58 | training=False) 59 | test_image_paths = test_dataset.image_paths 60 | 61 | # Define CRF. 62 | postprocessor = DenseCRF( 63 | iter_max=args.crf_iter_max, 64 | pos_xy_std=args.crf_pos_xy_std, 65 | pos_w=args.crf_pos_w, 66 | bi_xy_std=args.crf_bi_xy_std, 67 | bi_rgb_std=args.crf_bi_rgb_std, 68 | bi_w=args.crf_bi_w,) 69 | 70 | # Create models. 71 | if config.network.backbone_types == 'panoptic_pspnet_101': 72 | embedding_model = resnet_101_pspnet(config).cuda() 73 | elif config.network.backbone_types == 'panoptic_deeplab_101': 74 | embedding_model = resnet_101_deeplab(config).cuda() 75 | else: 76 | raise ValueError('Not support ' + config.network.backbone_types) 77 | 78 | prediction_model = softmax_classifier(config).cuda() 79 | embedding_model.eval() 80 | prediction_model.eval() 81 | 82 | # Load trained weights. 83 | model_path_template = os.path.join(args.snapshot_dir, 'model-{:d}.pth') 84 | save_iter = config.train.max_iteration - 1 85 | embedding_model.load_state_dict( 86 | torch.load(model_path_template.format(save_iter))['embedding_model'], 87 | resume=True) 88 | prediction_model.load_state_dict( 89 | torch.load(model_path_template.format(save_iter))['prediction_model']) 90 | 91 | # Start inferencing. 92 | with torch.no_grad(): 93 | for data_index in tqdm(range(len(test_dataset))): 94 | # Image path. 95 | image_path = test_image_paths[data_index] 96 | base_name = os.path.basename(image_path).replace('.jpg', '.png') 97 | 98 | # Image resolution. 99 | original_image_batch, original_label_batch, _ = test_dataset[data_index] 100 | image_h, image_w = original_image_batch['image'].shape[-2:] 101 | 102 | lab_tags = np.unique(original_label_batch['semantic_label']) 103 | lab_tags = lab_tags[lab_tags < config.dataset.num_classes] 104 | label_tags = np.zeros((config.dataset.num_classes,), dtype=np.bool) 105 | label_tags[lab_tags] = True 106 | label_tags = torch.from_numpy(label_tags).cuda() 107 | 108 | # Image resolution. 109 | batches = other_utils.create_image_pyramid( 110 | original_image_batch, original_label_batch, 111 | scales=[1], 112 | is_flip=True) 113 | 114 | affs = [] 115 | semantic_probs = [] 116 | for image_batch, label_batch, data_info in batches: 117 | resize_image_h, resize_image_w = image_batch['image'].shape[-2:] 118 | # Crop and Pad the input image. 119 | image_batch['image'] = transforms.resize_with_pad( 120 | image_batch['image'].transpose(1, 2, 0), 121 | config.test.crop_size, 122 | image_pad_value=0).transpose(2, 0, 1) 123 | image_batch['image'] = torch.FloatTensor( 124 | image_batch['image'][np.newaxis, ...]).cuda() 125 | pad_image_h, pad_image_w = image_batch['image'].shape[-2:] 126 | 127 | embeddings = embedding_model(image_batch, resize_as_input=True) 128 | outputs = prediction_model(embeddings) 129 | 130 | embs = embeddings['embedding'][:, :, :resize_image_h, :resize_image_w] 131 | semantic_logit = outputs['semantic_logit'][..., :resize_image_h, :resize_image_w] 132 | if data_info['is_flip']: 133 | embs = torch.flip(embs, dims=[3]) 134 | semantic_logit = torch.flip(semantic_logit, dims=[3]) 135 | embs = F.interpolate(embs, size=(image_h//8, image_w//8), mode='bilinear') 136 | embs = embs / torch.norm(embs, dim=1) 137 | embs_flat = embs.view(embs.shape[1], -1) 138 | aff = torch.matmul(embs_flat.t(), embs_flat).mul_(5).add_(-5).exp_() 139 | affs.append(aff) 140 | 141 | semantic_logit = F.interpolate( 142 | semantic_logit, size=(image_h//8, image_w//8), mode='bilinear') 143 | semantic_prob = F.softmax(semantic_logit, dim=1) 144 | semantic_probs.append(semantic_prob) 145 | 146 | semantic_probs = torch.cat(semantic_probs, dim=0) 147 | semantic_probs = torch.mean(semantic_probs, dim=0) 148 | 149 | # normalize cam. 150 | max_prob = torch.max(semantic_probs.view(21, -1), dim=1)[0] 151 | cam_full_arr = semantic_probs / max_prob.view(21, 1, 1) 152 | 153 | cam_shape = cam_full_arr.shape[-2:] 154 | label_tags = (~label_tags).view(-1, 1, 1).expand(-1, cam_shape[0], cam_shape[1]) 155 | cam_full_arr = cam_full_arr.masked_fill(label_tags, 0) 156 | if TH is not None: 157 | cam_full_arr[0] = TH 158 | 159 | aff = torch.mean(torch.stack(affs, dim=0), dim=0) 160 | 161 | # Start random walk. 162 | aff_mat = aff ** 20 163 | 164 | trans_mat = aff_mat / torch.sum(aff_mat, dim=0, keepdim=True) 165 | for _ in range(WALK_STEPS): 166 | trans_mat = torch.matmul(trans_mat, trans_mat) 167 | 168 | cam_vec = cam_full_arr.view(21, -1) 169 | cam_rw = torch.matmul(cam_vec, trans_mat) 170 | cam_rw = cam_rw.view(21, cam_shape[0], cam_shape[1]) 171 | 172 | cam_rw = cam_rw.data.cpu().numpy() 173 | cam_rw = cv2.resize(cam_rw.transpose(1, 2, 0), 174 | dsize=(image_w, image_h), 175 | interpolation=cv2.INTER_LINEAR) 176 | cam_rw_pred = np.argmax(cam_rw, axis=-1).astype(np.uint8) 177 | 178 | # CRF 179 | image = image_batch['image'].data.cpu().numpy().astype(np.float32) 180 | image = image[0, :, :image_h, :image_w].transpose(1, 2, 0) 181 | image *= np.reshape(config.network.pixel_stds, (1, 1, 3)) 182 | image += np.reshape(config.network.pixel_means, (1, 1, 3)) 183 | image = image * 255 184 | image = image.astype(np.uint8) 185 | cam_rw = postprocessor(image, cam_rw.transpose(2,0,1)) 186 | 187 | cam_rw_pred = np.argmax(cam_rw, axis=0).astype(np.uint8) 188 | 189 | # Save semantic predictions. 190 | semantic_pred = cam_rw_pred 191 | 192 | semantic_pred_name = os.path.join( 193 | semantic_dir, base_name) 194 | if not os.path.isdir(os.path.dirname(semantic_pred_name)): 195 | os.makedirs(os.path.dirname(semantic_pred_name)) 196 | Image.fromarray(semantic_pred, mode='L').save(semantic_pred_name) 197 | 198 | semantic_pred_rgb = color_map[semantic_pred] 199 | semantic_pred_rgb_name = os.path.join( 200 | semantic_rgb_dir, base_name) 201 | if not os.path.isdir(os.path.dirname(semantic_pred_rgb_name)): 202 | os.makedirs(os.path.dirname(semantic_pred_rgb_name)) 203 | Image.fromarray(semantic_pred_rgb, mode='RGB').save( 204 | semantic_pred_rgb_name) 205 | 206 | 207 | if __name__ == '__main__': 208 | main() 209 | -------------------------------------------------------------------------------- /pyscripts/inference/prototype_msc.py: -------------------------------------------------------------------------------- 1 | """Inference script for generating memory banks. 2 | """ 3 | from __future__ import print_function, division 4 | import os 5 | import math 6 | 7 | import PIL.Image as Image 8 | import numpy as np 9 | import cv2 10 | import torch 11 | import torch.backends.cudnn as cudnn 12 | from tqdm import tqdm 13 | 14 | import spml.data.transforms as transforms 15 | import spml.utils.general.vis as vis_utils 16 | import spml.utils.general.common as common_utils 17 | import spml.utils.general.others as other_utils 18 | from spml.data.datasets.base_dataset import ListDataset 19 | from spml.config.default import config 20 | from spml.config.parse_args import parse_args 21 | import spml.utils.segsort.common as segsort_common 22 | from spml.models.embeddings.resnet_pspnet import resnet_50_pspnet, resnet_101_pspnet 23 | from spml.models.embeddings.resnet_deeplab import resnet_50_deeplab, resnet_101_deeplab 24 | 25 | cudnn.enabled = True 26 | cudnn.benchmark = True 27 | 28 | 29 | def separate_comma(str_comma): 30 | ints = [int(i) for i in str_comma.split(',')] 31 | return ints 32 | 33 | 34 | def main(): 35 | """Inference for generating memory banks. 36 | """ 37 | # Retreve experiment configurations. 38 | args = parse_args('Inference for generating memory banks.') 39 | config.network.kmeans_num_clusters = separate_comma(args.kmeans_num_clusters) 40 | config.network.label_divisor = args.label_divisor 41 | 42 | # Create directories to save results. 43 | prototype_dir = os.path.join(args.save_dir, 'semantic_prototype') 44 | os.makedirs(prototype_dir, exist_ok=True) 45 | 46 | # Create color map. 47 | color_map = vis_utils.load_color_map(config.dataset.color_map_path) 48 | color_map = color_map.numpy() 49 | 50 | # Create data loaders. 51 | test_dataset = ListDataset( 52 | data_dir=args.data_dir, 53 | data_list=args.data_list, 54 | img_mean=config.network.pixel_means, 55 | img_std=config.network.pixel_stds, 56 | size=None, 57 | random_crop=False, 58 | random_scale=False, 59 | random_mirror=False, 60 | training=False) 61 | test_image_paths = test_dataset.image_paths 62 | 63 | # Create models. 64 | if config.network.backbone_types == 'panoptic_pspnet_101': 65 | embedding_model = resnet_101_pspnet(config).cuda() 66 | elif config.network.backbone_types == 'panoptic_deeplab_101': 67 | embedding_model = resnet_101_deeplab(config).cuda() 68 | else: 69 | raise ValueError('Not support ' + config.network.backbone_types) 70 | 71 | embedding_model = embedding_model.cuda() 72 | embedding_model.eval() 73 | 74 | # Load trained weights. 75 | model_path_template = os.path.join(args.snapshot_dir, 'model-{:d}.pth') 76 | save_iter = config.train.max_iteration - 1 77 | embedding_model.load_state_dict( 78 | torch.load(model_path_template.format(save_iter))['embedding_model'], 79 | resume=True) 80 | 81 | # Start inferencing. 82 | with torch.no_grad(): 83 | for data_index in tqdm(range(len(test_dataset))): 84 | #for data_index in range(3000): 85 | # Image path. 86 | image_path = test_image_paths[data_index] 87 | base_name = os.path.basename(image_path).replace('.jpg', '.png') 88 | 89 | # Image resolution. 90 | image_batch, label_batch, _ = test_dataset[data_index] 91 | image_h, image_w = image_batch['image'].shape[-2:] 92 | batches = other_utils.create_image_pyramid( 93 | image_batch, label_batch, 94 | scales=[0.5, 1, 1.5], 95 | is_flip=False) 96 | 97 | prototype_results = {'prototype': [], 'prototype_label': []} 98 | for image_batch, label_batch, data_info in batches: 99 | resize_image_h, resize_image_w = image_batch['image'].shape[-2:] 100 | # Crop and Pad the input image. 101 | image_batch['image'] = transforms.resize_with_pad( 102 | image_batch['image'].transpose(1, 2, 0), 103 | config.test.crop_size, 104 | image_pad_value=0).transpose(2, 0, 1) 105 | image_batch['image'] = torch.FloatTensor( 106 | image_batch['image'][np.newaxis, ...]).to("cuda:0") 107 | pad_image_h, pad_image_w = image_batch['image'].shape[-2:] 108 | 109 | # Create the fake labels where clustering ignores 255. 110 | fake_label_batch = {} 111 | for label_name in ['semantic_label', 'instance_label']: 112 | lab = np.zeros((resize_image_h, resize_image_w), 113 | dtype=np.uint8) 114 | lab = transforms.resize_with_pad( 115 | lab, 116 | config.test.crop_size, 117 | image_pad_value=config.dataset.semantic_ignore_index) 118 | 119 | fake_label_batch[label_name] = torch.LongTensor( 120 | lab[np.newaxis, ...]).to("cuda:0") 121 | 122 | # Put label batch to gpu 1. 123 | for k, v in label_batch.items(): 124 | label_batch[k] = torch.LongTensor(v[np.newaxis, ...]).to("cuda:0") 125 | 126 | # Create the ending index of each patch. 127 | stride_h, stride_w = config.test.stride 128 | crop_h, crop_w = config.test.crop_size 129 | npatches_h = math.ceil(1.0 * (pad_image_h-crop_h) / stride_h) + 1 130 | npatches_w = math.ceil(1.0 * (pad_image_w-crop_w) / stride_w) + 1 131 | patch_ind_h = np.linspace( 132 | crop_h, pad_image_h, npatches_h, dtype=np.int32) 133 | patch_ind_w = np.linspace( 134 | crop_w, pad_image_w, npatches_w, dtype=np.int32) 135 | 136 | # Create place holder for full-resolution embeddings. 137 | embeddings = {} 138 | counts = torch.FloatTensor( 139 | 1, 1, pad_image_h, pad_image_w).zero_().to("cuda:0") 140 | for ind_h in patch_ind_h: 141 | for ind_w in patch_ind_w: 142 | sh, eh = ind_h - crop_h, ind_h 143 | sw, ew = ind_w - crop_w, ind_w 144 | crop_image_batch = { 145 | k: v[:, :, sh:eh, sw:ew] for k, v in image_batch.items()} 146 | 147 | # Feed-forward. 148 | crop_embeddings = embedding_model.generate_embeddings( 149 | crop_image_batch, resize_as_input=True) 150 | 151 | # Initialize embedding. 152 | for name in crop_embeddings: 153 | if crop_embeddings[name] is None: 154 | continue 155 | crop_emb = crop_embeddings[name].to("cuda:0") 156 | if name in ['embedding']: 157 | crop_emb = common_utils.normalize_embedding( 158 | crop_emb.permute(0, 2, 3, 1).contiguous()) 159 | crop_emb = crop_emb.permute(0, 3, 1, 2) 160 | else: 161 | continue 162 | 163 | if name not in embeddings.keys(): 164 | embeddings[name] = torch.FloatTensor( 165 | 1, 166 | crop_emb.shape[1], 167 | pad_image_h, 168 | pad_image_w).zero_().to("cuda:0") 169 | embeddings[name][:, :, sh:eh, sw:ew] += crop_emb 170 | counts[:, :, sh:eh, sw:ew] += 1 171 | 172 | for k in embeddings.keys(): 173 | embeddings[k] /= counts 174 | 175 | # KMeans. 176 | lab_div = config.network.label_divisor 177 | fake_sem_lab = fake_label_batch['semantic_label'] 178 | fake_inst_lab = fake_label_batch['instance_label'] 179 | clustering_outputs = embedding_model.generate_clusters( 180 | embeddings.get('embedding', None), 181 | fake_sem_lab, 182 | fake_inst_lab) 183 | embeddings.update(clustering_outputs) 184 | 185 | # Save semantic prototypes. 186 | prototypes = segsort_common.calculate_prototypes_from_labels( 187 | embeddings['cluster_embedding'], 188 | embeddings['cluster_index']) 189 | _, prototype_labels = ( 190 | segsort_common.find_majority_label_index( 191 | label_batch['semantic_label'], 192 | embeddings['cluster_index'])) 193 | 194 | prototypes = prototypes.cpu().data.numpy() 195 | prototype_labels = prototype_labels.cpu().data.numpy() 196 | prototype_results['prototype'].append(prototypes) 197 | prototype_results['prototype_label'].append(prototype_labels) 198 | 199 | # Save semantic prototypes. 200 | prototype_name = os.path.join( 201 | prototype_dir, 202 | base_name.replace('.png', '.npy')) 203 | 204 | for k, v in prototype_results.items(): 205 | v = np.concatenate(v, axis=0) 206 | prototype_results[k] = v 207 | np.save(prototype_name, prototype_results) 208 | 209 | 210 | 211 | if __name__ == '__main__': 212 | main() 213 | -------------------------------------------------------------------------------- /pyscripts/inference/pseudo_softmax.py: -------------------------------------------------------------------------------- 1 | """Genereate pseudo labels by softmax classifier. 2 | """ 3 | from __future__ import print_function, division 4 | import os 5 | import math 6 | 7 | import PIL.Image as Image 8 | import numpy as np 9 | import cv2 10 | import torch 11 | import torch.nn.functional as F 12 | import torch.backends.cudnn as cudnn 13 | from tqdm import tqdm 14 | 15 | import spml.data.transforms as transforms 16 | import spml.utils.general.vis as vis_utils 17 | import spml.utils.general.others as other_utils 18 | from spml.data.datasets.base_dataset import ListDataset 19 | from spml.config.default import config 20 | from spml.config.parse_args import parse_args 21 | from spml.models.embeddings.resnet_pspnet import resnet_101_pspnet 22 | from spml.models.embeddings.resnet_deeplab import resnet_101_deeplab 23 | from spml.models.predictions.softmax_classifier import softmax_classifier 24 | from spml.models.crf import DenseCRF 25 | 26 | cudnn.enabled = True 27 | cudnn.benchmark = True 28 | 29 | WALK_STEPS=0 30 | TH=None 31 | 32 | 33 | def main(): 34 | """Generate pseudo labels by softmax classifier. 35 | """ 36 | # Retreve experiment configurations. 37 | args = parse_args('Generate pseudo labels by softmax classifier.') 38 | 39 | # Create directories to save results. 40 | semantic_dir = os.path.join(args.save_dir, 'semantic_gray') 41 | semantic_rgb_dir = os.path.join(args.save_dir, 'semantic_color') 42 | 43 | # Create color map. 44 | color_map = vis_utils.load_color_map(config.dataset.color_map_path) 45 | color_map = color_map.numpy() 46 | 47 | # Create data loaders. 48 | test_dataset = ListDataset( 49 | data_dir=args.data_dir, 50 | data_list=args.data_list, 51 | img_mean=config.network.pixel_means, 52 | img_std=config.network.pixel_stds, 53 | size=None, 54 | random_crop=False, 55 | random_scale=False, 56 | random_mirror=False, 57 | training=False) 58 | test_image_paths = test_dataset.image_paths 59 | 60 | # Define CRF. 61 | postprocessor = DenseCRF( 62 | iter_max=args.crf_iter_max, 63 | pos_xy_std=args.crf_pos_xy_std, 64 | pos_w=args.crf_pos_w, 65 | bi_xy_std=args.crf_bi_xy_std, 66 | bi_rgb_std=args.crf_bi_rgb_std, 67 | bi_w=args.crf_bi_w,) 68 | 69 | # Create models. 70 | if config.network.backbone_types == 'panoptic_pspnet_101': 71 | embedding_model = resnet_101_pspnet(config).cuda() 72 | elif config.network.backbone_types == 'panoptic_deeplab_101': 73 | embedding_model = resnet_101_deeplab(config).cuda() 74 | else: 75 | raise ValueError('Not support ' + config.network.backbone_types) 76 | 77 | prediction_model = softmax_classifier(config).cuda() 78 | embedding_model.eval() 79 | prediction_model.eval() 80 | 81 | # Load trained weights. 82 | model_path_template = os.path.join(args.snapshot_dir, 'model-{:d}.pth') 83 | save_iter = config.train.max_iteration - 1 84 | embedding_model.load_state_dict( 85 | torch.load(model_path_template.format(save_iter))['embedding_model'], 86 | resume=True) 87 | prediction_model.load_state_dict( 88 | torch.load(model_path_template.format(save_iter))['prediction_model']) 89 | 90 | # Start inferencing. 91 | with torch.no_grad(): 92 | for data_index in tqdm(range(len(test_dataset))): 93 | # Image path. 94 | image_path = test_image_paths[data_index] 95 | base_name = os.path.basename(image_path).replace('.jpg', '.png') 96 | 97 | # Image resolution. 98 | original_image_batch, original_label_batch, _ = test_dataset[data_index] 99 | image_h, image_w = original_image_batch['image'].shape[-2:] 100 | 101 | lab_tags = np.unique(original_label_batch['semantic_label']) 102 | lab_tags = lab_tags[lab_tags < config.dataset.num_classes] 103 | label_tags = np.zeros((config.dataset.num_classes,), dtype=np.bool) 104 | label_tags[lab_tags] = True 105 | label_tags = torch.from_numpy(label_tags).cuda() 106 | 107 | # Image resolution. 108 | batches = other_utils.create_image_pyramid( 109 | original_image_batch, original_label_batch, 110 | scales=[0.75, 1], 111 | is_flip=True) 112 | 113 | affs = [] 114 | semantic_probs = [] 115 | for image_batch, label_batch, data_info in batches: 116 | resize_image_h, resize_image_w = image_batch['image'].shape[-2:] 117 | # Crop and Pad the input image. 118 | image_batch['image'] = transforms.resize_with_pad( 119 | image_batch['image'].transpose(1, 2, 0), 120 | config.test.crop_size, 121 | image_pad_value=0).transpose(2, 0, 1) 122 | image_batch['image'] = torch.FloatTensor( 123 | image_batch['image'][np.newaxis, ...]).cuda() 124 | pad_image_h, pad_image_w = image_batch['image'].shape[-2:] 125 | 126 | embeddings = embedding_model(image_batch, resize_as_input=True) 127 | outputs = prediction_model(embeddings) 128 | 129 | embs = embeddings['embedding'][:, :, :resize_image_h, :resize_image_w] 130 | semantic_logit = outputs['semantic_logit'][..., :resize_image_h, :resize_image_w] 131 | if data_info['is_flip']: 132 | embs = torch.flip(embs, dims=[3]) 133 | semantic_logit = torch.flip(semantic_logit, dims=[3]) 134 | embs = F.interpolate(embs, size=(image_h//8, image_w//8), mode='bilinear') 135 | embs = embs / torch.norm(embs, dim=1) 136 | embs_flat = embs.view(embs.shape[1], -1) 137 | aff = torch.matmul(embs_flat.t(), embs_flat).mul_(5).add_(-5).exp_() 138 | affs.append(aff) 139 | 140 | semantic_logit = F.interpolate( 141 | semantic_logit, size=(image_h//8, image_w//8), mode='bilinear') 142 | #semantic_prob = F.softmax(semantic_logit, dim=1) 143 | #semantic_probs.append(semantic_prob) 144 | semantic_probs.append(semantic_logit) 145 | 146 | cat_semantic_probs = torch.cat(semantic_probs, dim=0) 147 | #semantic_probs, _ = torch.max(cat_semantic_probs, dim=0) 148 | #semantic_probs[0] = torch.min(cat_semantic_probs[:, 0, :, :], dim=0)[0] 149 | semantic_probs = torch.mean(cat_semantic_probs, dim=0) 150 | semantic_probs = F.softmax(semantic_probs, dim=0) 151 | 152 | # normalize cam. 153 | max_prob = torch.max(semantic_probs.view(21, -1), dim=1)[0] 154 | cam_full_arr = semantic_probs / max_prob.view(21, 1, 1) 155 | 156 | cam_shape = cam_full_arr.shape[-2:] 157 | label_tags = (~label_tags).view(-1, 1, 1).expand(-1, cam_shape[0], cam_shape[1]) 158 | cam_full_arr = cam_full_arr.masked_fill(label_tags, 0) 159 | if TH is not None: 160 | cam_full_arr[0] = TH 161 | 162 | aff = torch.mean(torch.stack(affs, dim=0), dim=0) 163 | 164 | # Start random walk. 165 | aff_mat = aff ** 20 166 | 167 | trans_mat = aff_mat / torch.sum(aff_mat, dim=0, keepdim=True) 168 | for _ in range(WALK_STEPS): 169 | trans_mat = torch.matmul(trans_mat, trans_mat) 170 | 171 | cam_vec = cam_full_arr.view(21, -1) 172 | cam_rw = torch.matmul(cam_vec, trans_mat) 173 | cam_rw = cam_rw.view(21, cam_shape[0], cam_shape[1]) 174 | 175 | cam_rw = cam_rw.data.cpu().numpy() 176 | cam_rw = cv2.resize(cam_rw.transpose(1, 2, 0), 177 | dsize=(image_w, image_h), 178 | interpolation=cv2.INTER_LINEAR) 179 | cam_rw_pred = np.argmax(cam_rw, axis=-1).astype(np.uint8) 180 | 181 | # CRF 182 | #image = image_batch['image'].data.cpu().numpy().astype(np.float32) 183 | #image = image[0, :, :image_h, :image_w].transpose(1, 2, 0) 184 | #image *= np.reshape(config.network.pixel_stds, (1, 1, 3)) 185 | #image += np.reshape(config.network.pixel_means, (1, 1, 3)) 186 | #image = image * 255 187 | #image = image.astype(np.uint8) 188 | #cam_rw = postprocessor(image, cam_rw.transpose(2,0,1)) 189 | 190 | #cam_rw_pred = np.argmax(cam_rw, axis=0).astype(np.uint8) 191 | 192 | # Save semantic predictions. 193 | semantic_pred = cam_rw_pred 194 | 195 | semantic_pred_name = os.path.join( 196 | semantic_dir, base_name) 197 | if not os.path.isdir(os.path.dirname(semantic_pred_name)): 198 | os.makedirs(os.path.dirname(semantic_pred_name)) 199 | Image.fromarray(semantic_pred, mode='L').save(semantic_pred_name) 200 | 201 | semantic_pred_rgb = color_map[semantic_pred] 202 | semantic_pred_rgb_name = os.path.join( 203 | semantic_rgb_dir, base_name) 204 | if not os.path.isdir(os.path.dirname(semantic_pred_rgb_name)): 205 | os.makedirs(os.path.dirname(semantic_pred_rgb_name)) 206 | Image.fromarray(semantic_pred_rgb, mode='RGB').save( 207 | semantic_pred_rgb_name) 208 | 209 | 210 | if __name__ == '__main__': 211 | main() 212 | --------------------------------------------------------------------------------