├── .gitignore ├── data_generation ├── refinement_network │ ├── data │ │ ├── __init__.py │ │ ├── base_data_loader.py │ │ ├── data_loader.py │ │ ├── custom_dataset_data_loader.py │ │ ├── single_dataset.py │ │ ├── base_dataset.py │ │ ├── image_folder.py │ │ └── aligned_dataset.py │ ├── models │ │ ├── __init__.py │ │ ├── models.py │ │ ├── base_model.py │ │ ├── test_model.py │ │ └── pix2pix_model.py │ ├── util │ │ ├── __init__.py │ │ ├── image_pool.py │ │ ├── html.py │ │ ├── util.py │ │ └── visualizer.py │ ├── options │ │ ├── __init__.py │ │ ├── test_options.py │ │ ├── train_options.py │ │ └── base_options.py │ ├── scripts │ │ ├── test.sh │ │ └── train.sh │ ├── test.py │ ├── datasets │ │ └── generate_pascal_training_prior.py │ ├── train.py │ └── README.md ├── examples │ ├── examples_origin.jpg │ ├── examples_overlay.jpg │ ├── origin_images │ │ ├── COCO_train2014_000000036827.jpg │ │ └── COCO_train2014_000000131780.jpg │ └── examples.json ├── pose2label.sh ├── demo.sh ├── overlay.py ├── pick_full_person.py ├── merge_parsing_result.py ├── README.md ├── crop_pose_and_generate_testing_prior.py └── generate_prior_util.py ├── parsing_network ├── dataset │ └── README ├── kaffe │ ├── caffe │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── resolver.pyc │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-34.pyc │ │ │ └── resolver.cpython-34.pyc │ │ └── resolver.py │ ├── graph.pyc │ ├── __init__.pyc │ ├── errors.pyc │ ├── layers.pyc │ ├── shapes.pyc │ ├── tensorflow │ │ ├── __init__.py │ │ ├── network.pyc │ │ ├── __init__.pyc │ │ ├── transformer.pyc │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-34.pyc │ │ │ ├── network.cpython-34.pyc │ │ │ └── transformer.cpython-34.pyc │ │ ├── transformer.py │ │ └── network.py │ ├── transformers.pyc │ ├── __pycache__ │ │ ├── graph.cpython-34.pyc │ │ ├── errors.cpython-34.pyc │ │ ├── layers.cpython-34.pyc │ │ ├── shapes.cpython-34.pyc │ │ ├── __init__.cpython-34.pyc │ │ └── transformers.cpython-34.pyc │ ├── __init__.py │ ├── errors.py │ ├── shapes.py │ ├── layers.py │ ├── transformers.py │ └── graph.py ├── output │ └── mj.png ├── misc │ ├── 2007_000129.jpg │ └── 2007_000129.png ├── requirements.txt ├── deeplab_resnet │ ├── model.pyc │ ├── utils.pyc │ ├── __init__.pyc │ ├── image_reader.pyc │ ├── __pycache__ │ │ ├── model.cpython-34.pyc │ │ ├── utils.cpython-34.pyc │ │ ├── __init__.cpython-34.pyc │ │ └── image_reader.cpython-34.pyc │ ├── __init__.py │ ├── utils.py │ └── image_reader.py ├── LICENSE ├── README.md ├── real-time-inference.py ├── inference.py ├── evaluate.py └── train.py ├── imgs ├── comparison.gif └── transferred.jpg ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /data_generation/refinement_network/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_generation/refinement_network/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_generation/refinement_network/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_generation/refinement_network/options/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /parsing_network/dataset/README: -------------------------------------------------------------------------------- 1 | unzip the data `archive.zip` here 2 | -------------------------------------------------------------------------------- /imgs/comparison.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/imgs/comparison.gif -------------------------------------------------------------------------------- /imgs/transferred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/imgs/transferred.jpg -------------------------------------------------------------------------------- /parsing_network/kaffe/caffe/__init__.py: -------------------------------------------------------------------------------- 1 | from .resolver import get_caffe_resolver, has_pycaffe 2 | -------------------------------------------------------------------------------- /parsing_network/output/mj.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/output/mj.png -------------------------------------------------------------------------------- /parsing_network/kaffe/graph.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/graph.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__init__.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/errors.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/errors.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/layers.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/layers.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/shapes.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/shapes.pyc -------------------------------------------------------------------------------- /parsing_network/misc/2007_000129.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/misc/2007_000129.jpg -------------------------------------------------------------------------------- /parsing_network/misc/2007_000129.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/misc/2007_000129.png -------------------------------------------------------------------------------- /parsing_network/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython>=0.19.2 2 | numpy>=1.7.1 3 | matplotlib>=1.3.1 4 | Pillow>=2.3.0 5 | six>=1.1.0 6 | -------------------------------------------------------------------------------- /parsing_network/kaffe/tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import TensorFlowTransformer 2 | from .network import Network 3 | -------------------------------------------------------------------------------- /parsing_network/kaffe/transformers.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/transformers.pyc -------------------------------------------------------------------------------- /parsing_network/deeplab_resnet/model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/model.pyc -------------------------------------------------------------------------------- /parsing_network/deeplab_resnet/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/utils.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/caffe/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/caffe/__init__.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/caffe/resolver.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/caffe/resolver.pyc -------------------------------------------------------------------------------- /data_generation/examples/examples_origin.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/data_generation/examples/examples_origin.jpg -------------------------------------------------------------------------------- /parsing_network/deeplab_resnet/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/__init__.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/tensorflow/network.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/network.pyc -------------------------------------------------------------------------------- /data_generation/examples/examples_overlay.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/data_generation/examples/examples_overlay.jpg -------------------------------------------------------------------------------- /parsing_network/kaffe/tensorflow/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/__init__.pyc -------------------------------------------------------------------------------- /parsing_network/deeplab_resnet/image_reader.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/image_reader.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/tensorflow/transformer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/transformer.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/__pycache__/graph.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/graph.cpython-34.pyc -------------------------------------------------------------------------------- /data_generation/refinement_network/scripts/test.sh: -------------------------------------------------------------------------------- 1 | python3 test.py --dataroot /path/to/dataset --dataset_mode single --model test --output_nc 1 --name exp1 2 | -------------------------------------------------------------------------------- /parsing_network/kaffe/__pycache__/errors.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/errors.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/__pycache__/layers.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/layers.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/__pycache__/shapes.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/shapes.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/__init__.py: -------------------------------------------------------------------------------- 1 | from .graph import GraphBuilder, NodeMapper 2 | from .errors import KaffeError, print_stderr 3 | 4 | from . import tensorflow 5 | -------------------------------------------------------------------------------- /parsing_network/kaffe/__pycache__/__init__.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/__init__.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/errors.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | class KaffeError(Exception): 4 | pass 5 | 6 | def print_stderr(msg): 7 | sys.stderr.write('%s\n' % msg) 8 | -------------------------------------------------------------------------------- /parsing_network/kaffe/__pycache__/transformers.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/transformers.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/deeplab_resnet/__pycache__/model.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/__pycache__/model.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/deeplab_resnet/__pycache__/utils.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/__pycache__/utils.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/caffe/__pycache__/__init__.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/caffe/__pycache__/__init__.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/caffe/__pycache__/resolver.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/caffe/__pycache__/resolver.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/deeplab_resnet/__pycache__/__init__.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/__pycache__/__init__.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/tensorflow/__pycache__/__init__.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/__pycache__/__init__.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/tensorflow/__pycache__/network.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/__pycache__/network.cpython-34.pyc -------------------------------------------------------------------------------- /data_generation/examples/origin_images/COCO_train2014_000000036827.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/data_generation/examples/origin_images/COCO_train2014_000000036827.jpg -------------------------------------------------------------------------------- /data_generation/examples/origin_images/COCO_train2014_000000131780.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/data_generation/examples/origin_images/COCO_train2014_000000131780.jpg -------------------------------------------------------------------------------- /parsing_network/deeplab_resnet/__pycache__/image_reader.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/__pycache__/image_reader.cpython-34.pyc -------------------------------------------------------------------------------- /parsing_network/kaffe/tensorflow/__pycache__/transformer.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/__pycache__/transformer.cpython-34.pyc -------------------------------------------------------------------------------- /data_generation/refinement_network/scripts/train.sh: -------------------------------------------------------------------------------- 1 | python3 train.py --dataroot /path/to/dataset --dataset_mode aligned --model pix2pix --no_gan --shuffle --n 5 --k 3 --output_nc 1 --name exp1 2 | -------------------------------------------------------------------------------- /parsing_network/deeplab_resnet/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import DeepLabResNetModel 2 | from .image_reader import ImageReader 3 | from .utils import decode_labels, inv_preprocess, prepare_label 4 | -------------------------------------------------------------------------------- /data_generation/refinement_network/data/base_data_loader.py: -------------------------------------------------------------------------------- 1 | class BaseDataLoader(): 2 | def __init__(self): 3 | pass 4 | 5 | def initialize(self, opt): 6 | self.opt = opt 7 | pass 8 | 9 | def load_data(self): 10 | return None 11 | -------------------------------------------------------------------------------- /data_generation/refinement_network/data/data_loader.py: -------------------------------------------------------------------------------- 1 | def CreateDataLoader(opt): 2 | from data.custom_dataset_data_loader import CustomDatasetDataLoader 3 | data_loader = CustomDatasetDataLoader() 4 | print(data_loader.name()) 5 | data_loader.initialize(opt) 6 | return data_loader 7 | -------------------------------------------------------------------------------- /data_generation/refinement_network/models/models.py: -------------------------------------------------------------------------------- 1 | def create_model(opt): 2 | model = None 3 | print(opt.model) 4 | if opt.model == 'pix2pix': 5 | assert(opt.dataset_mode.find('aligned') != -1) 6 | from .pix2pix_model import Pix2PixModel 7 | model = Pix2PixModel() 8 | elif opt.model == 'test': 9 | assert(opt.dataset_mode.find('single') != -1) 10 | from .test_model import TestModel 11 | model = TestModel() 12 | else: 13 | raise ValueError("Model [%s] not recognized." % opt.model) 14 | model.initialize(opt) 15 | print("model [%s] was created" % (model.name())) 16 | return model 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | WSHP: Weakly and Semi Supervised Human Body Part Parsing via Pose-Guided Knowledge Transfer 2 | 3 | SOFTWARE LICENSE AGREEMENT 4 | ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY 5 | 6 | BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE. 7 | 8 | This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Shanghai Jiao Tong University (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor. 9 | 10 | -------------------------------------------------------------------------------- /data_generation/refinement_network/options/test_options.py: -------------------------------------------------------------------------------- 1 | from .base_options import BaseOptions 2 | 3 | 4 | class TestOptions(BaseOptions): 5 | def initialize(self): 6 | BaseOptions.initialize(self) 7 | self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.') 8 | self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.') 9 | self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images') 10 | self.parser.add_argument('--phase', type=str, default='test', help='train, test, etc') 11 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') 12 | self.parser.add_argument('--how_many', type=int, default=float("inf"), help='how many test images to run') 13 | self.isTrain = False 14 | -------------------------------------------------------------------------------- /parsing_network/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Vladimir Nekrasov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /data_generation/pose2label.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pascal_pose_file_root="/path/to/pascal_pose_file.csv" 3 | pascal_mask_img_dir="/path/to/pascal_mask_img" 4 | origin_img_root="/path/to/origin_img" 5 | json_file_root="/path/to/pose_json_file" 6 | crop_output_path="/path/to/output/cropped_img_and_prior" 7 | experiment_name="exp1" 8 | merge_output_path="/path/to/output/merged_parsing_label" 9 | overlay_output_path="/path/to/output/overlayed_image" 10 | 11 | python crop_pose_and_generate_testing_prior.py --PASCALPoseFileRoot $pascal_pose_file_root --PASCALMaskImgDir $pascal_mask_img_dir --n 3 --k 3 --aug 0.25 --origin_img_root $origin_img_root --json_file_root $json_file_root --outputDir $crop_output_path 12 | cd refinement_network 13 | python3 test.py --dataroot $crop_output_path --dataset_mode single --model test --output_nc 1 --name $experiment_name --which_epoch latest 14 | cd .. 15 | python merge_parsing_result.py --outputDir $merge_output_path --parsing_root ./refinement_network/results/${experiment_name}/test_latest/images --origin_img_root $origin_img_root --json_file_root $json_file_root --aug 0.25 16 | python overlay.py --origin_img_root $origin_img_root --parsing_img_root $merge_output_path --outputDir $overlay_output_path 17 | -------------------------------------------------------------------------------- /data_generation/demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pascal_pose_file_root="./examples/pascalPose.csv" 3 | pascal_mask_img_dir="./examples/pascal_mask" 4 | origin_img_root="./examples/origin_images" 5 | json_file_root="./examples/examples.json" 6 | crop_output_path="./examples/outputs/crop_output" 7 | experiment_name="pretrained_model" 8 | merge_output_path="./examples/outputs/merge_output" 9 | overlay_output_path="./examples/outputs/overlay_output" 10 | 11 | python crop_pose_and_generate_testing_prior.py --PASCALPoseFileRoot $pascal_pose_file_root --PASCALMaskImgDir $pascal_mask_img_dir --n 3 --k 3 --aug 0.25 --origin_img_root $origin_img_root --json_file_root $json_file_root --outputDir $crop_output_path 12 | cd refinement_network 13 | python3 test.py --dataroot ../$crop_output_path --dataset_mode single --model test --output_nc 1 --name $experiment_name --which_epoch latest --checkpoints_dir ../examples --results_dir ../examples 14 | cd .. 15 | python merge_parsing_result.py --outputDir $merge_output_path --parsing_root ./examples/${experiment_name}/test_latest/images --origin_img_root $origin_img_root --json_file_root $json_file_root --aug 0.25 16 | python overlay.py --origin_img_root $origin_img_root --parsing_img_root $merge_output_path --outputDir $overlay_output_path -------------------------------------------------------------------------------- /data_generation/refinement_network/util/image_pool.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | from torch.autograd import Variable 4 | 5 | 6 | class ImagePool(): 7 | def __init__(self, pool_size): 8 | self.pool_size = pool_size 9 | if self.pool_size > 0: 10 | self.num_imgs = 0 11 | self.images = [] 12 | 13 | def query(self, images): 14 | if self.pool_size == 0: 15 | return Variable(images) 16 | return_images = [] 17 | for image in images: 18 | image = torch.unsqueeze(image, 0) 19 | if self.num_imgs < self.pool_size: 20 | self.num_imgs = self.num_imgs + 1 21 | self.images.append(image) 22 | return_images.append(image) 23 | else: 24 | p = random.uniform(0, 1) 25 | if p > 0.5: 26 | random_id = random.randint(0, self.pool_size - 1) 27 | tmp = self.images[random_id].clone() 28 | self.images[random_id] = image 29 | return_images.append(tmp) 30 | else: 31 | return_images.append(image) 32 | return_images = Variable(torch.cat(return_images, 0)) 33 | return return_images 34 | -------------------------------------------------------------------------------- /data_generation/refinement_network/data/custom_dataset_data_loader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from data.base_data_loader import BaseDataLoader 3 | 4 | 5 | def CreateDataset(opt): 6 | dataset = None 7 | if opt.dataset_mode == 'single': 8 | from data.single_dataset import SingleDataset 9 | dataset = SingleDataset() 10 | elif opt.dataset_mode == 'aligned': 11 | from data.aligned_dataset import AlignedDataset 12 | dataset = AlignedDataset() 13 | else: 14 | raise ValueError("Dataset [%s] not recognized." % opt.dataset_mode) 15 | 16 | print("dataset [%s] was created" % (dataset.name())) 17 | dataset.initialize(opt) 18 | return dataset 19 | 20 | 21 | class CustomDatasetDataLoader(BaseDataLoader): 22 | def name(self): 23 | return 'CustomDatasetDataLoader' 24 | 25 | def initialize(self, opt): 26 | BaseDataLoader.initialize(self, opt) 27 | self.dataset = CreateDataset(opt) 28 | self.dataloader = torch.utils.data.DataLoader( 29 | self.dataset, 30 | batch_size=opt.batchSize, 31 | shuffle=not opt.serial_batches, 32 | num_workers=int(opt.nThreads)) 33 | 34 | def load_data(self): 35 | return self 36 | 37 | def __len__(self): 38 | return min(len(self.dataset), self.opt.max_dataset_size) 39 | 40 | def __iter__(self): 41 | for i, data in enumerate(self.dataloader): 42 | if i >= self.opt.max_dataset_size: 43 | break 44 | yield data 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Weakly and Semi Supervised Human Body Part Parsing via Pose-Guided Knowledge Transfer 3 | 4 | [[arXiv](https://arxiv.org/abs/1805.04310)] 5 | 6 | Transferring human body part parsing labels to raw images by exploiting the anatomical similarity. Some transferred results: 7 | 8 |

9 | 10 |

11 | 12 | These results are used as extra training samples for the parsing network and can improve the part segmentation results: 13 | 14 | 15 |

16 | 17 |

18 | 19 | ## Getting Started 20 | 21 | #### Demo video 22 | 23 | Check out our demo video [here](https://youtu.be/nDqnMpE6b8s). 24 | 25 | #### Parsing Network 26 | 27 | Checkout `parsing_network` for training\testing\demo code of our parsing network. 28 | 29 | #### Data generation 30 | 31 | Checkout `data_generation` for code of using keypoints similarity to transfer parsing knowledge and generate synthetic training labels. 32 | 33 | ## Feedback 34 | 35 | If you get any problems during usage, please open an issue. 36 | 37 | ## Citation 38 | If you use this code for your research, please cite our paper: 39 | 40 | ``` 41 | @article{fang2018wshp, 42 | title={Weakly and Semi Supervised Human Body Part Parsing via Pose-Guided Knowledge Transfer}, 43 | author={Fang, Hao-Shu and Lu, Guansong and Fang, Xiaolin and Xie, Jianwen and Tai, Yu-Wing and Lu, Cewu}, 44 | journal={CVPR}, 45 | year={2018} 46 | } 47 | ``` 48 | -------------------------------------------------------------------------------- /data_generation/refinement_network/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from options.test_options import TestOptions 3 | from data.data_loader import CreateDataLoader 4 | from models.models import create_model 5 | from util.visualizer import Visualizer 6 | from util import html 7 | from collections import OrderedDict 8 | 9 | opt = TestOptions().parse() 10 | opt.nThreads = 1 # test code only supports nThreads = 1 11 | opt.batchSize = 1 # test code only supports batchSize = 1 12 | opt.serial_batches = True # no shuffle 13 | opt.no_flip = True # no flip 14 | 15 | data_loader = CreateDataLoader(opt) 16 | dataset = data_loader.load_data() 17 | model = create_model(opt) 18 | visualizer = Visualizer(opt) 19 | # create website 20 | web_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.which_epoch)) 21 | webpage = html.HTML(web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' % (opt.name, opt.phase, opt.which_epoch)) 22 | # test 23 | if opt.output_nc == 1: 24 | save_list = ['fake_B_postprocessed', 'fake_B_color'] 25 | else: 26 | save_list = ['fake_B'] 27 | 28 | for i, data in enumerate(dataset): 29 | if i >= opt.how_many: 30 | break 31 | model.set_input(data) 32 | model.test() 33 | visuals = model.get_current_visuals() 34 | visuals_selected = OrderedDict() 35 | for key in save_list: 36 | visuals_selected[key] = visuals[key] 37 | img_path = model.get_image_paths() 38 | print('%04d: process image... %s' % (i, img_path)) 39 | visualizer.save_images(webpage, visuals_selected, img_path, aspect_ratio=opt.aspect_ratio) 40 | 41 | webpage.save() 42 | -------------------------------------------------------------------------------- /parsing_network/kaffe/caffe/resolver.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | SHARED_CAFFE_RESOLVER = None 4 | 5 | class CaffeResolver(object): 6 | def __init__(self): 7 | self.import_caffe() 8 | 9 | def import_caffe(self): 10 | self.caffe = None 11 | try: 12 | # Try to import PyCaffe first 13 | import caffe 14 | self.caffe = caffe 15 | except ImportError: 16 | # Fall back to the protobuf implementation 17 | from . import caffepb 18 | self.caffepb = caffepb 19 | show_fallback_warning() 20 | if self.caffe: 21 | # Use the protobuf code from the imported distribution. 22 | # This way, Caffe variants with custom layers will work. 23 | self.caffepb = self.caffe.proto.caffe_pb2 24 | self.NetParameter = self.caffepb.NetParameter 25 | 26 | def has_pycaffe(self): 27 | return self.caffe is not None 28 | 29 | def get_caffe_resolver(): 30 | global SHARED_CAFFE_RESOLVER 31 | if SHARED_CAFFE_RESOLVER is None: 32 | SHARED_CAFFE_RESOLVER = CaffeResolver() 33 | return SHARED_CAFFE_RESOLVER 34 | 35 | def has_pycaffe(): 36 | return get_caffe_resolver().has_pycaffe() 37 | 38 | def show_fallback_warning(): 39 | msg = ''' 40 | ------------------------------------------------------------ 41 | WARNING: PyCaffe not found! 42 | Falling back to a pure protocol buffer implementation. 43 | * Conversions will be drastically slower. 44 | * This backend is UNTESTED! 45 | ------------------------------------------------------------ 46 | 47 | ''' 48 | sys.stderr.write(msg) 49 | -------------------------------------------------------------------------------- /data_generation/refinement_network/data/single_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from data.base_dataset import BaseDataset, get_transform 3 | from data.image_folder import make_dataset 4 | from PIL import Image 5 | import torchvision.transforms as transforms 6 | import torch 7 | 8 | 9 | class SingleDataset(BaseDataset): 10 | def initialize(self, opt): 11 | self.opt = opt 12 | self.root = opt.dataroot 13 | self.dir_A = os.path.join(opt.dataroot) 14 | self.dir_img = os.path.join(self.dir_A, 'img') 15 | self.dir_prior = os.path.join(self.dir_A, 'prior') 16 | 17 | self.img_paths = sorted(make_dataset(self.dir_img)) 18 | self.prior_paths = sorted(make_dataset(self.dir_prior)) 19 | 20 | def __getitem__(self, index): 21 | img_path = self.img_paths[index] 22 | img = Image.open(img_path).convert('RGB') 23 | img = img.resize((self.opt.fineSize, self.opt.fineSize), Image.BICUBIC) 24 | img = transforms.ToTensor()(img) 25 | 26 | prior_path = self.prior_paths[index] 27 | prior = Image.open(prior_path).convert('RGB') 28 | prior = prior.resize((self.opt.fineSize, self.opt.fineSize), Image.BICUBIC) 29 | prior = transforms.ToTensor()(prior) 30 | 31 | img = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(img) 32 | prior = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(prior) 33 | 34 | if self.opt.which_direction == 'BtoA': 35 | input_nc = self.opt.output_nc 36 | else: 37 | input_nc = self.opt.input_nc 38 | 39 | A = torch.cat([img, prior], dim=0) 40 | 41 | return {'A': A, 'A_paths': img_path} 42 | 43 | def __len__(self): 44 | return len(self.img_paths) 45 | 46 | def name(self): 47 | return 'SingleDataset' 48 | -------------------------------------------------------------------------------- /data_generation/refinement_network/data/base_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | from PIL import Image 3 | import torchvision.transforms as transforms 4 | 5 | 6 | class BaseDataset(data.Dataset): 7 | def __init__(self): 8 | super(BaseDataset, self).__init__() 9 | 10 | def name(self): 11 | return 'BaseDataset' 12 | 13 | def initialize(self, opt): 14 | pass 15 | 16 | 17 | def get_transform(opt): 18 | transform_list = [] 19 | if opt.resize_or_crop == 'resize_and_crop': 20 | osize = [opt.loadSize, opt.loadSize] 21 | transform_list.append(transforms.Scale(osize, Image.BICUBIC)) 22 | transform_list.append(transforms.RandomCrop(opt.fineSize)) 23 | elif opt.resize_or_crop == 'crop': 24 | transform_list.append(transforms.RandomCrop(opt.fineSize)) 25 | elif opt.resize_or_crop == 'scale_width': 26 | transform_list.append(transforms.Lambda( 27 | lambda img: __scale_width(img, opt.fineSize))) 28 | elif opt.resize_or_crop == 'scale_width_and_crop': 29 | transform_list.append(transforms.Lambda( 30 | lambda img: __scale_width(img, opt.loadSize))) 31 | transform_list.append(transforms.RandomCrop(opt.fineSize)) 32 | 33 | if opt.isTrain and not opt.no_flip: 34 | transform_list.append(transforms.RandomHorizontalFlip()) 35 | 36 | transform_list += [transforms.ToTensor(), 37 | transforms.Normalize((0.5, 0.5, 0.5), 38 | (0.5, 0.5, 0.5))] 39 | return transforms.Compose(transform_list) 40 | 41 | 42 | def __scale_width(img, target_width): 43 | ow, oh = img.size 44 | if (ow == target_width): 45 | return img 46 | w = target_width 47 | h = int(target_width * oh / ow) 48 | return img.resize((w, h), Image.BICUBIC) 49 | -------------------------------------------------------------------------------- /data_generation/overlay.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Overlay origin image and painted parsing label. 3 | 4 | >>> python overlay.py --origin_img_root /path/to/origin_img --parsing_img_root /path/to/parsing_img --outputDir /path/to/output 5 | >>> 6 | ''' 7 | 8 | import os 9 | import numpy as np 10 | import cv2 11 | import argparse 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("--origin_img_root", help="path to origin img") 15 | parser.add_argument("--parsing_img_root", help="path to parsing img") 16 | parser.add_argument("--outputDir", help="where to put output files") 17 | parser.add_argument("--factor", type=int, default=1, help='multiply factor') 18 | parser.add_argument("--aug", type=float, default=0, help='augmentation factor for crop') 19 | a = parser.parse_args() 20 | 21 | origin_img_root = a.origin_img_root 22 | parsing_img_root= a.parsing_img_root 23 | output_path = a.outputDir 24 | 25 | body_part_color = np.array([ 26 | [0, 0, 0], 27 | [128, 0, 0], 28 | [0, 128, 0], 29 | [128, 128, 0], 30 | [0, 0, 128], 31 | [128, 0, 128], 32 | [0, 128, 128], 33 | [128, 128, 128], 34 | [64, 0, 0], 35 | [192, 0, 0], 36 | [64, 128, 0]], 37 | dtype=np.uint8) 38 | 39 | def paint(mask_img): 40 | assert(len(mask_img.shape) == 2) 41 | return body_part_color[mask_img] 42 | 43 | def overlay(origin_img, parsing_img): 44 | overlay_img = origin_img*0.7 + parsing_img[:,:,[2,1,0]]*0.9 45 | overlay_img = (overlay_img > 255) * 255 + overlay_img * (overlay_img <= 255) 46 | return overlay_img 47 | 48 | if not os.path.exists(output_path): 49 | os.makedirs(output_path) 50 | 51 | for root, dirs, files in os.walk(parsing_img_root): 52 | for file in files: 53 | origin_img = cv2.imread(os.path.join(origin_img_root, file[0:len(file)-4]+'.jpg')) 54 | parsing_img = cv2.imread(os.path.join(root, file), 0) 55 | overlay_img = overlay(origin_img, paint(parsing_img)) 56 | 57 | cv2.imwrite(os.path.join(output_path, file), overlay_img) 58 | print(file) 59 | -------------------------------------------------------------------------------- /data_generation/refinement_network/models/base_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | 5 | class BaseModel(): 6 | def name(self): 7 | return 'BaseModel' 8 | 9 | def initialize(self, opt): 10 | self.opt = opt 11 | self.gpu_ids = opt.gpu_ids 12 | self.isTrain = opt.isTrain 13 | self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor 14 | self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) 15 | 16 | def set_input(self, input): 17 | self.input = input 18 | 19 | def forward(self): 20 | pass 21 | 22 | # used in test time, no backprop 23 | def test(self): 24 | pass 25 | 26 | def get_image_paths(self): 27 | pass 28 | 29 | def optimize_parameters(self): 30 | pass 31 | 32 | def get_current_visuals(self): 33 | return self.input 34 | 35 | def get_current_errors(self): 36 | return {} 37 | 38 | def save(self, label): 39 | pass 40 | 41 | # helper saving function that can be used by subclasses 42 | def save_network(self, network, network_label, epoch_label, gpu_ids): 43 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label) 44 | save_path = os.path.join(self.save_dir, save_filename) 45 | torch.save(network.cpu().state_dict(), save_path) 46 | if len(gpu_ids) and torch.cuda.is_available(): 47 | network.cuda(gpu_ids[0]) 48 | 49 | # helper loading function that can be used by subclasses 50 | def load_network(self, network, network_label, epoch_label): 51 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label) 52 | save_path = os.path.join(self.save_dir, save_filename) 53 | network.load_state_dict(torch.load(save_path)) 54 | 55 | # update learning rate (called once every epoch) 56 | def update_learning_rate(self): 57 | for scheduler in self.schedulers: 58 | scheduler.step() 59 | lr = self.optimizers[0].param_groups[0]['lr'] 60 | print('learning rate = %.7f' % lr) 61 | -------------------------------------------------------------------------------- /data_generation/examples/examples.json: -------------------------------------------------------------------------------- 1 | {"COCO_train2014_000000131780.jpg": {"version": 0.1, "bodies": [{"joints": [317, 294, 0.649612545967, 321, 308, 0.671884775162, 317, 314, 0.750793337822, 315, 330, 0.698100686073, 312, 343, 0.746511995792, 326, 313, 0.782322406769, 327, 330, 0.731025755405, 325, 343, 0.769655823708, 317, 344, 0.579411387444, 318, 365, 0.561899662018, 322, 384, 0.496061146259, 322, 344, 0.657607674599, 317, 365, 0.71800494194, 323, 384, 0.690972328186, 321, 314, 0.271296024323]}, {"joints": [144, 286, 0.677381157875, 143, 298, 0.671079695225, 151, 307, 0.833187520504, 154, 322, 0.788151741028, 144, 319, 0.181723922491, 131, 305, 0.868985891342, 128, 320, 0.708187639713, 138, 318, 0.19234508276, 146, 341, 0.685109257698, 146, 365, 0.792463898659, 146, 385, 0.816615819931, 135, 341, 0.670794785023, 132, 365, 0.780540108681, 128, 385, 0.814137935638, 141, 306, 0.177925795317]}, {"joints": [405, 284, 0.52686548233, 405, 298, 0.623795926571, 414, 306, 0.736892104149, 418, 326, 0.526854753494, 418, 344, 0.315674066544, 399, 305, 0.832540273666, 392, 326, 0.757319688797, 380, 339, 0.743686616421, 409, 346, 0.524510025978, 399, 371, 0.722547531128, 399, 395, 0.832679629326, 402, 345, 0.585395276546, 403, 372, 0.775037169456, 419, 390, 0.778844773769, 406, 306, 0.234650954604]}, {"joints": [369, 295, 0.6036901474, 368, 310, 0.603479743004, 376, 315, 0.740995645523, 380, 329, 0.485697984695, 370, 334, 0.175854563713, 361, 316, 0.79310631752, 354, 333, 0.615259408951, 351, 339, 0.255796700716, 372, 348, 0.525605559349, 373, 369, 0.676989197731, 382, 389, 0.740746855736, 360, 347, 0.509381949902, 356, 370, 0.656241238117, 357, 393, 0.819907128811, 369, 315, 0.233699262142]}]}, "COCO_train2014_000000036827.jpg": {"version": 0.1, "bodies": [{"joints": [361, 96, 0.664693593979, 367, 159, 0.644438028336, 334, 173, 0.775427997112, 323, 247, 0.697471022606, 304, 298, 0.677327096462, 404, 182, 0.758776426315, 413, 265, 0.746202170849, 391, 326, 0.686128020287, 334, 322, 0.674754321575, 330, 407, 0.811001181602, 334, 479, 0.818583607674, 374, 326, 0.647439479828, 377, 421, 0.769766330719, 430, 511, 0.744681596756, 371, 176, 0.182139545679]}]}} -------------------------------------------------------------------------------- /data_generation/refinement_network/util/html.py: -------------------------------------------------------------------------------- 1 | import dominate 2 | from dominate.tags import * 3 | import os 4 | 5 | 6 | class HTML: 7 | def __init__(self, web_dir, title, reflesh=0): 8 | self.title = title 9 | self.web_dir = web_dir 10 | self.img_dir = os.path.join(self.web_dir, 'images') 11 | if not os.path.exists(self.web_dir): 12 | os.makedirs(self.web_dir) 13 | if not os.path.exists(self.img_dir): 14 | os.makedirs(self.img_dir) 15 | # print(self.img_dir) 16 | 17 | self.doc = dominate.document(title=title) 18 | if reflesh > 0: 19 | with self.doc.head: 20 | meta(http_equiv="reflesh", content=str(reflesh)) 21 | 22 | def get_image_dir(self): 23 | return self.img_dir 24 | 25 | def add_header(self, str): 26 | with self.doc: 27 | h3(str) 28 | 29 | def add_table(self, border=1): 30 | self.t = table(border=border, style="table-layout: fixed;") 31 | self.doc.add(self.t) 32 | 33 | def add_images(self, ims, txts, links, width=400): 34 | self.add_table() 35 | with self.t: 36 | with tr(): 37 | for im, txt, link in zip(ims, txts, links): 38 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 39 | with p(): 40 | with a(href=os.path.join('images', link)): 41 | img(style="width:%dpx" % width, src=os.path.join('images', im)) 42 | br() 43 | p(txt) 44 | 45 | def save(self): 46 | html_file = '%s/index.html' % self.web_dir 47 | f = open(html_file, 'wt') 48 | f.write(self.doc.render()) 49 | f.close() 50 | 51 | 52 | if __name__ == '__main__': 53 | html = HTML('web/', 'test_html') 54 | html.add_header('hello world') 55 | 56 | ims = [] 57 | txts = [] 58 | links = [] 59 | for n in range(4): 60 | ims.append('image_%d.png' % n) 61 | txts.append('text_%d' % n) 62 | links.append('image_%d.png' % n) 63 | html.add_images(ims, txts, links) 64 | html.save() 65 | -------------------------------------------------------------------------------- /data_generation/refinement_network/datasets/generate_pascal_training_prior.py: -------------------------------------------------------------------------------- 1 | ''' 2 | We use Pascal dataset, which has both keypoints and segmentation annotations, to generate prior for other dataset which has only keypoints information. 3 | In order to train our refinement network, we need to generate prior for each pascal image, this is what this code for. 4 | 5 | >>> python generate_pascal_training_prior.py --PASCALPoseFileRoot /path/to/pascal_pose_file.csv --PASCALMaskImgDir /path/to/pascal_mask_img --outputDir /path/to/output --n 5 --k 3 6 | >>> 7 | ''' 8 | 9 | import argparse 10 | from generate_prior_util import * 11 | 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("--PASCALPoseFileRoot", help="path to PASCAL pose file") 15 | parser.add_argument("--PASCALMaskImgDir", help="path to PASCAL mask images") 16 | parser.add_argument("--outputDir", help="where to put output files") 17 | parser.add_argument("--n", type=int, default=5, help="number of close images picked first time") 18 | parser.add_argument("--k", type=int, default=3, help="number of close images picked for prior generation in n picked images") 19 | opt = parser.parse_args() 20 | 21 | # load PASCAL pose 22 | pascal_poses, pascal_img_names, pascal_pose_dict = load_pascal_pose(opt.PASCALPoseFileRoot) 23 | 24 | if not os.path.exists(opt.outputDir): 25 | os.makedirs(opt.outputDir) 26 | 27 | for i in range(len(pascal_img_names)): 28 | pascal_name = pascal_img_names[i] 29 | print('processing', pascal_name) 30 | pascal_mask_img = cv2.imread(os.path.join(opt.PASCALMaskImgDir, pascal_name + ".png"), 0) 31 | if not os.path.exists(os.path.join(opt.outputDir, pascal_name)): 32 | os.makedirs(os.path.join(opt.outputDir, pascal_name)) 33 | pascal_average_parsing = generate_prior_single_person([0, 0, pascal_mask_img.shape[1] - 1, pascal_mask_img.shape[0] - 1], 34 | pascal_pose_dict[pascal_name], opt.PASCALMaskImgDir, pascal_poses, 35 | pascal_img_names, pascal_pose_dict, opt.n, opt.k, exclude_self=True, save_dir=os.path.join(opt.outputDir, pascal_name)) 36 | pascal_average_parsing = pascal_average_parsing[:, :, [2, 1, 0]] 37 | cv2.imwrite(os.path.join(opt.outputDir, pascal_name + ".png"), pascal_average_parsing) 38 | print(pascal_name, i) 39 | -------------------------------------------------------------------------------- /data_generation/refinement_network/data/image_folder.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Code from 3 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py 4 | # Modified the original code so that it also loads images from the current 5 | # directory as well as the subdirectories 6 | ############################################################################### 7 | 8 | import torch.utils.data as data 9 | 10 | from PIL import Image 11 | import os 12 | import os.path 13 | 14 | IMG_EXTENSIONS = [ 15 | '.jpg', '.JPG', '.jpeg', '.JPEG', 16 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 17 | ] 18 | 19 | 20 | def is_image_file(filename): 21 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 22 | 23 | 24 | def make_dataset(dir): 25 | images = [] 26 | assert os.path.isdir(dir), '%s is not a valid directory' % dir 27 | 28 | for root, _, fnames in sorted(os.walk(dir)): 29 | for fname in fnames: 30 | if is_image_file(fname): 31 | path = os.path.join(root, fname) 32 | images.append(path) 33 | 34 | return images 35 | 36 | 37 | def default_loader(path): 38 | return Image.open(path).convert('RGB') 39 | 40 | 41 | class ImageFolder(data.Dataset): 42 | 43 | def __init__(self, root, transform=None, return_paths=False, 44 | loader=default_loader): 45 | imgs = make_dataset(root) 46 | if len(imgs) == 0: 47 | raise(RuntimeError("Found 0 images in: " + root + "\n" 48 | "Supported image extensions are: " + 49 | ",".join(IMG_EXTENSIONS))) 50 | 51 | self.root = root 52 | self.imgs = imgs 53 | self.transform = transform 54 | self.return_paths = return_paths 55 | self.loader = loader 56 | 57 | def __getitem__(self, index): 58 | path = self.imgs[index] 59 | img = self.loader(path) 60 | if self.transform is not None: 61 | img = self.transform(img) 62 | if self.return_paths: 63 | return img, path 64 | else: 65 | return img 66 | 67 | def __len__(self): 68 | return len(self.imgs) 69 | -------------------------------------------------------------------------------- /data_generation/refinement_network/train.py: -------------------------------------------------------------------------------- 1 | import time 2 | from options.train_options import TrainOptions 3 | from data.data_loader import CreateDataLoader 4 | from models.models import create_model 5 | from util.visualizer import Visualizer 6 | 7 | opt = TrainOptions().parse() 8 | data_loader = CreateDataLoader(opt) 9 | dataset = data_loader.load_data() 10 | dataset_size = len(data_loader) 11 | print('#training images = %d' % dataset_size) 12 | 13 | model = create_model(opt) 14 | visualizer = Visualizer(opt) 15 | total_steps = 0 16 | 17 | for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1): 18 | epoch_start_time = time.time() 19 | iter_data_time = time.time() 20 | epoch_iter = 0 21 | 22 | for i, data in enumerate(dataset): 23 | iter_start_time = time.time() 24 | if total_steps % opt.print_freq == 0: 25 | t_data = iter_start_time - iter_data_time 26 | visualizer.reset() 27 | total_steps += opt.batchSize 28 | epoch_iter += opt.batchSize 29 | model.set_input(data) 30 | model.optimize_parameters() 31 | 32 | if total_steps % opt.display_freq == 0: 33 | save_result = total_steps % opt.update_html_freq == 0 34 | visualizer.display_current_results(model.get_current_visuals(), epoch, save_result) 35 | 36 | if total_steps % opt.print_freq == 0: 37 | errors = model.get_current_errors() 38 | t = (time.time() - iter_start_time) / opt.batchSize 39 | visualizer.print_current_errors(epoch, epoch_iter, errors, t, t_data) 40 | if opt.display_id > 0: 41 | visualizer.plot_current_errors(epoch, float(epoch_iter) / dataset_size, opt, errors) 42 | 43 | if total_steps % opt.save_latest_freq == 0: 44 | print('saving the latest model (epoch %d, total_steps %d)' % 45 | (epoch, total_steps)) 46 | model.save('latest') 47 | 48 | iter_data_time = time.time() 49 | if epoch % opt.save_epoch_freq == 0: 50 | print('saving the model at the end of epoch %d, iters %d' % 51 | (epoch, total_steps)) 52 | model.save('latest') 53 | model.save(epoch) 54 | 55 | print('End of epoch %d / %d \t Time Taken: %d sec' % 56 | (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time)) 57 | model.update_learning_rate() 58 | -------------------------------------------------------------------------------- /data_generation/pick_full_person.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import numpy as np 4 | from tqdm import tqdm 5 | import matplotlib.pyplot as plt 6 | import os 7 | from PIL import Image 8 | import json 9 | import shutil 10 | import argparse 11 | 12 | def parse_args(): 13 | """Parse input arguments.""" 14 | parser = argparse.ArgumentParser(description='') 15 | parser.add_argument('--outputpath',dest='outputpath', help='path of output', default="") 16 | parser.add_argument('--inputpath',dest='inputpath', help='path of inputpath', default="") 17 | args = parser.parse_args() 18 | return args 19 | 20 | def filter_pose(intputpath, outputpath, imgname): 21 | save = True 22 | for pid in range(len(rmpe_results[imgname])): 23 | pose = np.array(rmpe_results[imgname][pid]['keypoints']).reshape(-1,3)[:,:3] 24 | for idx_c in range(16): 25 | if (pose[idx_c,2]) < 0.15: 26 | save = False 27 | break 28 | if save == False: 29 | break 30 | if save == False: 31 | return False 32 | return True 33 | 34 | 35 | if __name__ == '__main__': 36 | args = parse_args() 37 | outputpath = args.outputpath 38 | inputpath = args.inputpath 39 | jsonpath = os.path.join(args.outputpath,"POSE/alpha-pose-results-forvis.json") 40 | 41 | result3={} 42 | with open(jsonpath) as f: 43 | rmpe_results = json.load(f) 44 | for imgname in tqdm(rmpe_results.keys()): 45 | if filter_pose(inputpath, outputpath, imgname): 46 | for pid in range(len(rmpe_results[imgname])): 47 | if imgname not in result3.keys(): 48 | result3[imgname]={} 49 | result3[imgname]['version']=0.1 50 | result3[imgname]['bodies']=[] 51 | tmp={'joints':[]} 52 | indexarr=[27,24,36,33,30,39,42,45,6,3,0,9,12,15,21] 53 | for i in indexarr: 54 | tmp['joints'].append(rmpe_results[imgname][pid]['keypoints'][i]) 55 | tmp['joints'].append(rmpe_results[imgname][pid]['keypoints'][i+1]) 56 | tmp['joints'].append(rmpe_results[imgname][pid]['keypoints'][i+2]) 57 | result3[imgname]['bodies'].append(tmp) 58 | with open("full-person.json",'w') as json_file: 59 | json_file.write(json.dumps(result3)) -------------------------------------------------------------------------------- /data_generation/refinement_network/models/test_model.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Variable 2 | from collections import OrderedDict 3 | import util.util as util 4 | from .base_model import BaseModel 5 | from . import networks 6 | 7 | 8 | class TestModel(BaseModel): 9 | def name(self): 10 | return 'TestModel' 11 | 12 | def initialize(self, opt): 13 | assert(not opt.isTrain) 14 | BaseModel.initialize(self, opt) 15 | self.netG = networks.define_G(opt.input_nc, opt.output_nc, 16 | opt.ngf, opt.which_model_netG, 17 | opt.norm, not opt.no_dropout, 18 | opt.init_type, 19 | self.gpu_ids) 20 | which_epoch = opt.which_epoch 21 | self.load_network(self.netG, 'G', which_epoch) 22 | 23 | print('---------- Networks initialized -------------') 24 | networks.print_network(self.netG) 25 | print('-----------------------------------------------') 26 | 27 | def set_input(self, input): 28 | # we need to use single_dataset mode 29 | input_A = input['A'] 30 | if len(self.gpu_ids) > 0: 31 | input_A = input_A.cuda(self.gpu_ids[0], async=True) 32 | self.input_A = input_A 33 | self.image_paths = input['A_paths'] 34 | 35 | def test(self): 36 | self.real_A = Variable(self.input_A) 37 | self.fake_B = self.netG(self.real_A) 38 | 39 | # get image paths 40 | def get_image_paths(self): 41 | return self.image_paths 42 | 43 | def get_current_visuals(self): 44 | real_A_img, real_A_prior = util.tensor2im(self.real_A.data) 45 | fake_B = util.tensor2im(self.fake_B.data) 46 | if self.opt.output_nc == 1: 47 | fake_B_postprocessed = util.postprocess_parsing(fake_B, self.isTrain) 48 | fake_B_color = util.paint_color(fake_B_postprocessed) 49 | if self.opt.output_nc == 1: 50 | return OrderedDict([ 51 | ('real_A_img', real_A_img), 52 | ('real_A_prior', real_A_prior), 53 | ('fake_B', fake_B), 54 | ('fake_B_postprocessed', fake_B_postprocessed), 55 | ('fake_B_color', fake_B_color)] 56 | ) 57 | else: 58 | return OrderedDict([ 59 | ('real_A_img', real_A_img), 60 | ('real_A_prior', real_A_prior), 61 | ('fake_B', fake_B)] 62 | ) 63 | -------------------------------------------------------------------------------- /parsing_network/kaffe/shapes.py: -------------------------------------------------------------------------------- 1 | import math 2 | from collections import namedtuple 3 | 4 | from .errors import KaffeError 5 | 6 | TensorShape = namedtuple('TensorShape', ['batch_size', 'channels', 'height', 'width']) 7 | 8 | 9 | def get_filter_output_shape(i_h, i_w, params, round_func): 10 | o_h = (i_h + 2 * params.pad_h - params.kernel_h) / float(params.stride_h) + 1 11 | o_w = (i_w + 2 * params.pad_w - params.kernel_w) / float(params.stride_w) + 1 12 | return (int(round_func(o_h)), int(round_func(o_w))) 13 | 14 | 15 | def get_strided_kernel_output_shape(node, round_func): 16 | assert node.layer is not None 17 | input_shape = node.get_only_parent().output_shape 18 | o_h, o_w = get_filter_output_shape(input_shape.height, input_shape.width, 19 | node.layer.kernel_parameters, round_func) 20 | params = node.layer.parameters 21 | has_c_o = hasattr(params, 'num_output') 22 | c = params.num_output if has_c_o else input_shape.channels 23 | return TensorShape(input_shape.batch_size, c, o_h, o_w) 24 | 25 | 26 | def shape_not_implemented(node): 27 | raise NotImplementedError 28 | 29 | 30 | def shape_identity(node): 31 | assert len(node.parents) > 0 32 | return node.parents[0].output_shape 33 | 34 | 35 | def shape_scalar(node): 36 | return TensorShape(1, 1, 1, 1) 37 | 38 | 39 | def shape_data(node): 40 | if node.output_shape: 41 | # Old-style input specification 42 | return node.output_shape 43 | try: 44 | # New-style input specification 45 | return map(int, node.parameters.shape[0].dim) 46 | except: 47 | # We most likely have a data layer on our hands. The problem is, 48 | # Caffe infers the dimensions of the data from the source (eg: LMDB). 49 | # We want to avoid reading datasets here. Fail for now. 50 | # This can be temporarily fixed by transforming the data layer to 51 | # Caffe's "input" layer (as is usually used in the "deploy" version). 52 | # TODO: Find a better solution for this. 53 | raise KaffeError('Cannot determine dimensions of data layer.\n' 54 | 'See comments in function shape_data for more info.') 55 | 56 | 57 | def shape_mem_data(node): 58 | params = node.parameters 59 | return TensorShape(params.batch_size, params.channels, params.height, params.width) 60 | 61 | 62 | def shape_concat(node): 63 | axis = node.layer.parameters.axis 64 | output_shape = None 65 | for parent in node.parents: 66 | if output_shape is None: 67 | output_shape = list(parent.output_shape) 68 | else: 69 | output_shape[axis] += parent.output_shape[axis] 70 | return tuple(output_shape) 71 | 72 | 73 | def shape_convolution(node): 74 | return get_strided_kernel_output_shape(node, math.floor) 75 | 76 | 77 | def shape_pool(node): 78 | return get_strided_kernel_output_shape(node, math.ceil) 79 | 80 | 81 | def shape_inner_product(node): 82 | input_shape = node.get_only_parent().output_shape 83 | return TensorShape(input_shape.batch_size, node.layer.parameters.num_output, 1, 1) 84 | -------------------------------------------------------------------------------- /parsing_network/README.md: -------------------------------------------------------------------------------- 1 | # Parsing Network 2 | 3 | ## Requirements 4 | 5 | TensorFlow (version >= 1.1.0) needs to be installed before running the scripts. 6 | To install the required python packages (except TensorFlow), run 7 | ```bash 8 | pip install -r requirements.txt 9 | ``` 10 | or for a local installation 11 | ```bash 12 | pip install -user -r requirements.txt 13 | ``` 14 | 15 | ## Preparation 16 | 17 | To train the network, you first need to download the data from [here](https://pan.baidu.com/s/1ywqpcsvPTsjIY_Slsl9Zhg)(code: 6wqq). It contains data from original Pascal-Person-Part dataset and our generated data. Our released dataset contains over 150K images, which we believe can facilitate the research in the area of human parsing. After downloading the dataset, unzip it to folder `dataset/`. 18 | 19 | Besides, download the pre-trained weights on COCO dataset from 20 | [Baidu Pan](https://pan.baidu.com/s/1AoGJbZ4YHEbV0x89K1-8nQ)(code:r0yb) 21 | or 22 | [Gdrive](https://drive.google.com/file/d/1aPb2rilhYestHL_7jJglmCHv7ScIG8MQ/view?usp=sharingand), 23 | unzip it under current folder. 24 | 25 | ## Training 26 | 27 | Our training method consists of two steps. First, we train our network on the whole dataset. Then we finetune the model on the original dataset. Note that due to time limitation, we only train the network on the whole dataset for 10 epochs. There may be potential performance gains if we train for more epochs. 28 | ```bash 29 | ## Train our network on the whole dataset, model.ckpt-50000 is the pre-trained weights on COCO dataset 30 | python train.py --data-dir ./dataset/ --data-list dataset/train_all.txt --num-epochs 10 --restore-from models/model.ckpt-50000 --not-restore-last --snapshot-dir snapshots-new-fromcoco --random-scale --random-mirror --save-pred-every 50000 31 | 32 | ## Finetune the model on the original dataset 33 | python train.py --data-dir ./dataset/ --data-list dataset/pascal_train.txt --num-epochs 90 --restore-from snapshots-new-fromcoco/model.ckpt-213129 --snapshot-dir snapshots-new-fromcoco-finetune --random-scale --random-mirror --save-pred-every 10000 34 | ``` 35 | 36 | It takes about two days to train a model on a single Titan X GPU card. 37 | 38 | 39 | ## Evaluation 40 | We provide a pretrained model in `models/final_model/`. 41 | Run the following command to get the evaluation result of Pascal-Person-Part dataset, it should achieve 64.3% of mean intersection-over-union: 42 | ```bash 43 | python evaluate.py --data-dir ./dataset/ --restore-from ./models/final_model/model.ckpt-19315 44 | ``` 45 | 46 | ## Inference 47 | 48 | To perform inference over your own images, use the following command: 49 | ```bash 50 | python inference.py /path/to/img/folder /path/to/model --data_list /path/to/data/list 51 | ``` 52 | This will run the forward pass and save the renderred result at `\output` folder: 53 | 54 |

55 | 56 |

57 | 58 | ## Acknowledgement 59 | 60 | This implementation of [DeepLabV2-ResNet](http://liangchiehchen.com/projects/DeepLabv2_resnet.html) is originally from [tensorflow-deeplab-resnet](https://github.com/DrSleep/tensorflow-deeplab-resnet). Thanks DrSleep for his sharing! 61 | -------------------------------------------------------------------------------- /data_generation/refinement_network/options/train_options.py: -------------------------------------------------------------------------------- 1 | from .base_options import BaseOptions 2 | 3 | 4 | class TrainOptions(BaseOptions): 5 | def initialize(self): 6 | BaseOptions.initialize(self) 7 | self.parser.add_argument('--display_freq', type=int, default=500, help='frequency of showing training results on screen') 8 | self.parser.add_argument('--display_single_pane_ncols', type=int, default=0, help='if positive, display all images in a single visdom web panel with certain number of images per row.') 9 | self.parser.add_argument('--update_html_freq', type=int, default=500, help='frequency of saving training results to html') 10 | self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console') 11 | self.parser.add_argument('--save_latest_freq', type=int, default=30000, help='frequency of saving the latest results') 12 | self.parser.add_argument('--save_epoch_freq', type=int, default=5, help='frequency of saving checkpoints at the end of epochs') 13 | self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model') 14 | self.parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by , +, ...') 15 | self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') 16 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') 17 | self.parser.add_argument('--niter', type=int, default=200, help='# of iter at starting learning rate') 18 | self.parser.add_argument('--niter_decay', type=int, default=0, help='# of iter to linearly decay learning rate to zero') 19 | self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') 20 | self.parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam') 21 | self.parser.add_argument('--no_lsgan', action='store_true', help='do *not* use least square GAN, if false, use vanilla GAN') 22 | self.parser.add_argument('--no_gan', action='store_true', help='do *not* use gan loss') 23 | self.parser.add_argument('--n', type=int, default=5) 24 | self.parser.add_argument('--k', type=int, default=3) 25 | self.parser.add_argument('--use_l2', action='store_true', help='use l2 loss for netG') 26 | self.parser.add_argument('--shuffle', action='store_true', help='shuffle index for prior') 27 | self.parser.add_argument('--lambda_A', type=float, default=100.0, help='weight for reference loss') 28 | self.parser.add_argument('--pool_size', type=int, default=0, help='the size of image buffer that stores previously generated images') 29 | self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/') 30 | self.parser.add_argument('--lr_policy', type=str, default='lambda', help='learning rate policy: lambda|step|plateau') 31 | self.parser.add_argument('--lr_decay_iters', type=int, default=50, help='multiply by a gamma every lr_decay_iters iterations') 32 | 33 | self.isTrain = True 34 | -------------------------------------------------------------------------------- /data_generation/refinement_network/util/util.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import numpy as np 4 | from PIL import Image 5 | import os 6 | 7 | 8 | # Converts a Tensor into a Numpy array 9 | # |imtype|: the desired type of the converted numpy array 10 | def tensor2im(image_tensor, imtype=np.uint8): 11 | image_numpy = image_tensor[0].cpu().float().numpy() 12 | if image_numpy.shape[0] > 3: 13 | image_numpy_1 = image_numpy[0:3, :, :] 14 | image_numpy_2 = image_numpy[3:, :, :] 15 | image_numpy_1 = (np.transpose(image_numpy_1, (1, 2, 0)) + 1) / 2.0 * 255.0 16 | image_numpy_2 = (np.transpose(image_numpy_2, (1, 2, 0)) + 1) / 2.0 * 255.0 17 | return image_numpy_1.astype(imtype), image_numpy_2.astype(imtype) 18 | 19 | if image_numpy.shape[0] == 1: 20 | image_numpy = np.tile(image_numpy, (3, 1, 1)) 21 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 22 | return image_numpy.astype(imtype) 23 | 24 | 25 | body_part_color = np.array([ 26 | [0, 0, 0], 27 | [128, 0, 0], 28 | [0, 128, 0], 29 | [128, 128, 0], 30 | [0, 0, 128], 31 | [128, 0, 128], 32 | [0, 128, 128], 33 | [128, 128, 128], 34 | [64, 0, 0], 35 | [192, 0, 0], 36 | [64, 128, 0]], 37 | dtype=np.uint8) 38 | 39 | 40 | def postprocess_parsing(image_numpy, isTrain): 41 | imtype = image_numpy.dtype 42 | image_numpy = image_numpy[:, :, 0:1] 43 | image_numpy = np.tile(image_numpy, (1, 1, 7)).astype(int) 44 | standard = np.arange(7) 45 | standard *= 35 46 | standard = standard.reshape((1, 1, 7)) 47 | standard = np.tile(standard, (image_numpy.shape[0], image_numpy.shape[1], 1)) 48 | diff = np.abs(image_numpy - standard) 49 | min_index = np.argmin(diff, axis=2).reshape(((image_numpy.shape[0], image_numpy.shape[1], 1))) 50 | image_numpy = np.tile(min_index, (1, 1, 3)) 51 | if isTrain: 52 | image_numpy = image_numpy * 35 53 | return image_numpy.astype(imtype) 54 | 55 | 56 | def paint_color(image_numpy): 57 | image_np = image_numpy[:, :, 0] 58 | if image_np.max() > 10: 59 | image_np = image_np / 35 60 | image_np = image_np.astype(np.uint8) 61 | return body_part_color[image_np] 62 | 63 | 64 | def diagnose_network(net, name='network'): 65 | mean = 0.0 66 | count = 0 67 | for param in net.parameters(): 68 | if param.grad is not None: 69 | mean += torch.mean(torch.abs(param.grad.data)) 70 | count += 1 71 | if count > 0: 72 | mean = mean / count 73 | print(name) 74 | print(mean) 75 | 76 | 77 | def save_image(image_numpy, image_path): 78 | image_pil = Image.fromarray(image_numpy) 79 | image_pil.save(image_path) 80 | 81 | 82 | def print_numpy(x, val=True, shp=False): 83 | x = x.astype(np.float64) 84 | if shp: 85 | print('shape,', x.shape) 86 | if val: 87 | x = x.flatten() 88 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % ( 89 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x))) 90 | 91 | 92 | def mkdirs(paths): 93 | if isinstance(paths, list) and not isinstance(paths, str): 94 | for path in paths: 95 | mkdir(path) 96 | else: 97 | mkdir(paths) 98 | 99 | 100 | def mkdir(path): 101 | if not os.path.exists(path): 102 | os.makedirs(path) 103 | -------------------------------------------------------------------------------- /parsing_network/real-time-inference.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import tensorflow as tf 4 | from deeplab_resnet import DeepLabResNetModel, decode_labels, prepare_label 5 | import argparse 6 | import time 7 | 8 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 9 | 10 | NUM_CLASSES = 7 11 | IMG_SIZE = 512 12 | 13 | input_feed_shape = (1, IMG_SIZE, IMG_SIZE, 3) 14 | 15 | def get_arguments(): 16 | """Parse all the arguments provided from the CLI. 17 | 18 | Returns: 19 | A list of parsed arguments. 20 | """ 21 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network Inference.") 22 | parser.add_argument("--model_weights", type=str, default='./final_model/', 23 | help="Path to the file with model weights.") 24 | parser.add_argument("--num_classes", type=int, default=NUM_CLASSES, 25 | help="Number of classes to predict (including background).") 26 | return parser.parse_args() 27 | 28 | 29 | def load(saver, sess, ckpt_path): 30 | '''Load trained weights. 31 | 32 | Args: 33 | saver: TensorFlow saver object. 34 | sess: TensorFlow session. 35 | ckpt_path: path to checkpoint file with parameters. 36 | ''' 37 | saver.restore(sess, ckpt_path) 38 | print("Restored model parameters from {}".format(ckpt_path)) 39 | 40 | args = get_arguments() 41 | 42 | img_tf = tf.placeholder(dtype=tf.float32, shape=input_feed_shape) 43 | 44 | net = DeepLabResNetModel({'data': img_tf}, is_training=False, num_classes=args.num_classes) 45 | 46 | restore_var = tf.global_variables() 47 | 48 | # Predictions. 49 | raw_output = net.layers['fc1_voc12'] 50 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(np.zeros( shape=(IMG_SIZE, IMG_SIZE, 3) ) )[0:2,]) 51 | raw_output_up = tf.argmax(raw_output_up, dimension=3) 52 | pred = tf.expand_dims(raw_output_up, dim=3) 53 | 54 | config = tf.ConfigProto() 55 | config.gpu_options.allow_growth = True 56 | sess = tf.Session(config=config) 57 | init = tf.global_variables_initializer() 58 | 59 | sess.run(init) 60 | 61 | # Load weights. 62 | ckpt = tf.train.get_checkpoint_state(args.model_weights) 63 | loader = tf.train.Saver(var_list=restore_var) 64 | load(loader, sess, ckpt.model_checkpoint_path) 65 | 66 | 67 | def process_frame(frame): 68 | frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE)) 69 | input_img_feed = np.array(frame, dtype=float) 70 | input_img_feed = np.expand_dims(input_img_feed, axis=0) 71 | 72 | start_time = time.time() 73 | preds = sess.run(pred, feed_dict={img_tf: input_img_feed}) 74 | elapsed_time = time.time() - start_time 75 | print("FPS: ", 1 / elapsed_time) 76 | msk = decode_labels(preds, num_classes=NUM_CLASSES) 77 | im = msk[0] 78 | final = cv2.addWeighted(im,0.9,frame,0.7,0) 79 | return final 80 | 81 | def main(): 82 | 83 | cap = cv2.VideoCapture(0) 84 | 85 | i = 1 86 | while(True): 87 | i += 1 88 | # Capture frame-by-frame 89 | ret, frame = cap.read() 90 | 91 | frame_out = process_frame(frame) 92 | #frame_out = cv2.resize(frame, (512,512)) 93 | # Display the resulting frame 94 | cv2.imshow('frame',frame_out) 95 | if cv2.waitKey(1) & 0xFF == ord('q'): 96 | break 97 | 98 | # When everything done, release the capture 99 | cap.release() 100 | cv2.destroyAllWindows() 101 | 102 | if __name__ == '__main__': 103 | main() 104 | -------------------------------------------------------------------------------- /data_generation/merge_parsing_result.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Merge parsing result of cropped poses together to be the label of the whole origin image. 3 | 4 | >>> python merge_parsing_result.py --outputDir /path/to/output --parsing_root /root_of_refinement_network/results/${experiment_name}/test_latest/images --origin_img_root /path/to/origin_img --json_file_root /path/to/pose_json_file --aug 0.25 5 | >>> 6 | ''' 7 | 8 | import numpy as np 9 | import os 10 | import cv2 11 | import argparse 12 | import json 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("--json_file_root", help="path to json file") 16 | parser.add_argument("--origin_img_root", help="path to origin img") 17 | parser.add_argument("--parsing_root", help="path to parsing results") 18 | parser.add_argument("--outputDir", help="where to put output files") 19 | parser.add_argument("--factor", type=int, default=1, help='multiply factor') 20 | parser.add_argument("--aug", type=float, default=0.25, help='augmentation factor for crop') 21 | opt = parser.parse_args() 22 | 23 | origin_img_root = opt.origin_img_root 24 | json_file_root = opt.json_file_root 25 | parsing_root = opt.parsing_root 26 | 27 | json_file = open(json_file_root, "r") 28 | json_string = json_file.readline() 29 | json_dict = json.loads(json_string) 30 | 31 | if not os.path.exists(opt.outputDir): 32 | os.makedirs(opt.outputDir) 33 | 34 | # coco to pascal keypoints order 35 | coco2pascal = [9, 8, 12, 11, 10, 13, 14, 15, 2, 1, 0, 3, 4, 5, 7] 36 | # the 6th keypoint is missing in coco 37 | 38 | num_images = 0 39 | for k, v in json_dict.items(): 40 | num_images += 1 41 | image_id = k 42 | origin_img = cv2.imread(os.path.join(origin_img_root, image_id)) 43 | all_prior = np.zeros(origin_img.shape, dtype=np.uint8) 44 | bodies = v["bodies"] 45 | for i in range(len(bodies)): 46 | ''' 47 | The following process of raw_pose and bbox should be the same as in crop_pose_and_generate_testing_prior.py 48 | ''' 49 | body = bodies[i] 50 | keypoints = body["joints"] 51 | raw_pose = np.zeros((1, 32), dtype=float) 52 | min_x = keypoints[0] 53 | max_x = min_x 54 | min_y = keypoints[1] 55 | max_y = min_y 56 | for j in range(15): 57 | x = keypoints[3*j] 58 | y = keypoints[3*j+1] 59 | raw_pose[0][2*coco2pascal[j]] = x 60 | raw_pose[0][2*coco2pascal[j]+1] = y 61 | if x < min_x: 62 | min_x = x 63 | elif x > max_x: 64 | max_x = x 65 | if y < min_y: 66 | min_y = y 67 | elif y > max_y: 68 | max_y = y 69 | raw_pose[0][2*6] = (raw_pose[0][2*2] + raw_pose[0][2*3]) / 2 70 | raw_pose[0][2*6+1] = (raw_pose[0][2*2+1] + raw_pose[0][2*3+1]) / 2 71 | if max_x > origin_img.shape[1] or max_y > origin_img.shape[0]-1: 72 | print(max_x, max_y) 73 | print(image_id + " pose outside img") 74 | 75 | # deal with bbox 76 | bbox = [min_x, min_y, max_x, max_y] 77 | xaug = int((max_x - min_x + 1) * opt.aug) 78 | yaug = int((max_y - min_y + 1) * opt.aug) 79 | bbox[0] = max(bbox[0] - xaug, 0) 80 | bbox[1] = max(bbox[1] - yaug, 0) 81 | bbox[2] = min(bbox[2] + xaug, origin_img.shape[1]-1) 82 | bbox[3] = min(bbox[3] + yaug, origin_img.shape[0]-1) 83 | print('bbox', bbox) 84 | 85 | prior = cv2.imread(os.path.join(parsing_root, image_id.split('.')[0] + '_' + str(i) + '_fake_B_postprocessed.png')) 86 | prior = cv2.resize(prior, (bbox[2]+1-bbox[0], bbox[3]+1-bbox[1]), interpolation=cv2.INTER_NEAREST) 87 | all_prior[bbox[1]:bbox[3]+1, bbox[0]:bbox[2]+1] = np.maximum(all_prior[bbox[1]:bbox[3]+1, bbox[0]:bbox[2]+1], prior) 88 | 89 | print(image_id, i, num_images) 90 | # all_prior = all_prior + (all_prior == 0) * 255 91 | cv2.imwrite(os.path.join(opt.outputDir, image_id[:len(image_id)-3]+'png'), all_prior[:, :, 0]) 92 | 93 | print('finished') 94 | -------------------------------------------------------------------------------- /data_generation/refinement_network/data/aligned_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import random 3 | import torchvision.transforms as transforms 4 | import torch 5 | from data.base_dataset import BaseDataset 6 | from data.image_folder import make_dataset 7 | from PIL import Image 8 | import ntpath 9 | import numpy as np 10 | 11 | 12 | class AlignedDataset(BaseDataset): 13 | def initialize(self, opt): 14 | self.opt = opt 15 | self.root = opt.dataroot 16 | self.dir_AB = os.path.join(opt.dataroot) 17 | self.dir_img = os.path.join(self.dir_AB, 'img') 18 | self.dir_priors = os.path.join(self.dir_AB, 'prior') 19 | self.dir_parsing = os.path.join(self.dir_AB, 'parsing') 20 | 21 | self.img_paths = sorted(make_dataset(self.dir_img)) 22 | self.parsing_paths = sorted(make_dataset(self.dir_parsing)) 23 | assert(opt.resize_or_crop == 'resize_and_crop') 24 | 25 | def __getitem__(self, index): 26 | # img 27 | img_path = self.img_paths[index % self.__len__()] 28 | short_path = ntpath.basename(img_path) 29 | img_name = os.path.splitext(short_path)[0] 30 | img = Image.open(img_path).convert('RGB') 31 | img = img.resize((self.opt.loadSize, self.opt.loadSize), Image.BICUBIC) 32 | img = transforms.ToTensor()(img) 33 | 34 | # prior 35 | prior_indexes = np.arange(self.opt.n) 36 | if self.opt.shuffle: 37 | random.shuffle(prior_indexes) 38 | prior = torch.zeros(3, self.opt.loadSize, self.opt.loadSize) 39 | for i in range(self.opt.k): 40 | morphed_prior_path = os.path.join(self.dir_priors, img_name, str(prior_indexes[i]) + '.png') 41 | morphed_prior = Image.open(morphed_prior_path).convert('RGB') 42 | morphed_prior = morphed_prior.resize((self.opt.loadSize, self.opt.loadSize), Image.BICUBIC) 43 | morphed_prior = transforms.ToTensor()(morphed_prior) 44 | prior += morphed_prior 45 | prior /= self.opt.k 46 | 47 | # parsing 48 | parsing_path = self.parsing_paths[index % self.__len__()] 49 | parsing = Image.open(parsing_path).convert('RGB') 50 | parsing = parsing.resize((self.opt.loadSize, self.opt.loadSize), Image.NEAREST) 51 | parsing = transforms.ToTensor()(parsing) 52 | 53 | w = img.size(2) 54 | h = img.size(1) 55 | w_offset = random.randint(0, max(0, w - self.opt.fineSize - 1)) 56 | h_offset = random.randint(0, max(0, h - self.opt.fineSize - 1)) 57 | 58 | img = img[:, h_offset:h_offset + self.opt.fineSize, w_offset:w_offset + self.opt.fineSize] 59 | prior = prior[:, h_offset:h_offset + self.opt.fineSize, w_offset:w_offset + self.opt.fineSize] 60 | parsing = parsing[:, h_offset:h_offset + self.opt.fineSize, w_offset:w_offset + self.opt.fineSize] 61 | 62 | img = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(img) 63 | prior = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(prior) 64 | parsing = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(parsing) 65 | 66 | if self.opt.which_direction == 'BtoA': 67 | input_nc = self.opt.output_nc 68 | output_nc = self.opt.input_nc 69 | else: 70 | input_nc = self.opt.input_nc 71 | output_nc = self.opt.output_nc 72 | 73 | if (not self.opt.no_flip) and random.random() < 0.5: 74 | idx = [i for i in range(img.size(2) - 1, -1, -1)] 75 | idx = torch.LongTensor(idx) 76 | img = img.index_select(2, idx) 77 | prior = prior.index_select(2, idx) 78 | parsing = parsing.index_select(2, idx) 79 | 80 | A = torch.cat([img, prior], dim=0) 81 | B = parsing 82 | if output_nc == 1: # RGB to gray 83 | tmp = B[0, ...] * 0.299 + B[1, ...] * 0.587 + B[2, ...] * 0.114 84 | B = tmp.unsqueeze(0) 85 | 86 | return {'A': A, 'B': B, 87 | 'A_paths': img_path, 'B_paths': parsing_path} 88 | 89 | def __len__(self): 90 | return min(len(self.img_paths), len(self.parsing_paths)) 91 | 92 | def name(self): 93 | return 'AlignedDataset' 94 | -------------------------------------------------------------------------------- /data_generation/refinement_network/README.md: -------------------------------------------------------------------------------- 1 | # Refinement Network 2 | 3 | This is the code for the Refinement Network. We use Refinement Network to generate parsing label for single person. 4 | 5 | ## Prerequisites 6 | - Linux or macOS 7 | - Python 3 8 | - CPU or NVIDIA GPU + CUDA CuDNN 9 | 10 | ## Getting Started 11 | ### Installation 12 | - Install PyTorch 0.3.0 and dependencies from http://pytorch.org 13 | - Install Torch vision from the source. 14 | ```bash 15 | git clone https://github.com/pytorch/vision 16 | cd vision 17 | python3 setup.py install 18 | ``` 19 | 20 | - Install python libraries [visdom](https://github.com/facebookresearch/visdom) and [dominate](https://github.com/Knio/dominate). 21 | ```bash 22 | pip3 install visdom 23 | pip3 install dominate 24 | ``` 25 | 26 | - Clone this repo: 27 | ```bash 28 | git clone https://github.com/MVIG-SJTU/WSHP 29 | cd WSHP/data_generation/refinement_network 30 | ``` 31 | 32 | ### Train/Test 33 | #### Train 34 | - Prepare a training dataset, which should have the following directories: 35 | ``` 36 | /dataroot 37 | /img 38 | img1.ext 39 | img2.ext 40 | ... 41 | /parsing 42 | img1.ext 43 | img2.ext 44 | ... 45 | /prior 46 | /img1 47 | 0.png 48 | 1.png 49 | ... 50 | /img2 51 | 0.png 52 | 1.png 53 | ... 54 | ... 55 | ``` 56 | In our project, we use `Pascal` dataset to train our Refinement Network. But to train refinement network, besides origin image and parsing label, we also need prior images. In `datasets` directory, we give some code to show how to do this. You can use any dataset which has both two kinds of annotations. [Here](https://drive.google.com/open?id=1Ck8_1m74aLGDhawIbsdYsCee9_sFiBcE) is some data you can use. 57 | 58 | - Train a model: 59 | ```bash 60 | #!./scripts/train.sh 61 | python3 train.py --dataroot /path/to/dataset --dataset_mode aligned --model pix2pix --no_gan --shuffle --n 5 --k 3 --output_nc 1 --name exp1 62 | ``` 63 | 64 | - To view training results and loss plots, run `python3 -m visdom.server` and click the URL http://localhost:8097. To see more intermediate results, check out `./checkpoints/exp1/web/index.html` 65 | 66 | #### Test 67 | - Prepare a testing dataset, which should have the following directories: 68 | ``` 69 | /dataroot 70 | /img 71 | img1.ext 72 | img2.ext 73 | ... 74 | /prior 75 | img1.ext 76 | img2.ext 77 | ... 78 | ``` 79 | For more details, please refer to the [parent module](https://github.com/Fang-Haoshu/WSHP/data_generation) where we discuss how to generate prior for dataset which has only keypoints information. 80 | 81 | - Test the model: 82 | ```bash 83 | #!./scripts/test.sh 84 | python3 test.py --dataroot /path/to/dataset --dataset_mode single --model test --output_nc 1 --name exp1 85 | ``` 86 | The test results will be saved to a html file here: `./results/exp1/test_latest/index.html`. 87 | 88 | ## Training/Test Details 89 | - Flags: see `options/train_options.py` and `options/base_options.py` for all the training flags; see `options/test_options.py` and `options/base_options.py` for all the test flags. 90 | - CPU/GPU (default `--gpu_ids 0`): set`--gpu_ids -1` to use CPU mode; set `--gpu_ids 0,1,2` for multi-GPU mode. You need a large batch size (e.g. `--batchSize 32`) to benefit from multiple GPUs. 91 | - Visualization: during training, the current results can be viewed using two methods. First, if you set `--display_id` > 0, the results and loss plot will appear on a local graphics web server launched by [visdom](https://github.com/facebookresearch/visdom). To do this, you should have `visdom` installed and a server running by the command `python3 -m visdom.server`. The default server URL is `http://localhost:8097`. `display_id` corresponds to the window ID that is displayed on the `visdom` server. The `visdom` display functionality is turned on by default. To avoid the extra overhead of communicating with `visdom` set `--display_id 0`. Second, the intermediate results are saved to `[opt.checkpoints_dir]/[opt.name]/web/` as an HTML file. To avoid this, set `--no_html`. 92 | - Preprocessing: images can be resized and cropped in different ways using `--resize_or_crop` option. The default option `'resize_and_crop'` resizes the image to be of size `(opt.loadSize, opt.loadSize)` and does a random crop of size `(opt.fineSize, opt.fineSize)`. `'crop'` skips the resizing step and only performs random cropping. `'scale_width'` resizes the image to have width `opt.fineSize` while keeping the aspect ratio. `'scale_width_and_crop'` first resizes the image to have width `opt.loadSize` and then does random cropping of size `(opt.fineSize, opt.fineSize)`. 93 | 94 | ## Acknowledgments 95 | Code is inspired by [pytorch-CycleGAN-and-pix2pix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix). 96 | -------------------------------------------------------------------------------- /parsing_network/inference.py: -------------------------------------------------------------------------------- 1 | """Run DeepLab-ResNet on a given image. 2 | 3 | This script computes a segmentation mask for a given image. 4 | """ 5 | 6 | from __future__ import print_function 7 | 8 | import argparse 9 | from datetime import datetime 10 | import os 11 | import sys 12 | import time 13 | 14 | from PIL import Image 15 | 16 | import tensorflow as tf 17 | import numpy as np 18 | 19 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, prepare_label 20 | 21 | import pdb 22 | 23 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 24 | 25 | NUM_CLASSES = 7 26 | DATA_LIST = './dataset/dance.txt' 27 | SAVE_DIR = './output/' 28 | 29 | def get_arguments(): 30 | """Parse all the arguments provided from the CLI. 31 | 32 | Returns: 33 | A list of parsed arguments. 34 | """ 35 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network Inference.") 36 | parser.add_argument("img_path", type=str, 37 | help="Path to the RGB image file folder.") 38 | parser.add_argument("model_weights", type=str, 39 | help="Path to the file with model weights.") 40 | parser.add_argument("--data_list", type=str, default=DATA_LIST, 41 | help="Path to the image list.") 42 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES, 43 | help="Number of classes to predict (including background).") 44 | parser.add_argument("--save-dir", type=str, default=SAVE_DIR, 45 | help="Where to save predicted mask.") 46 | return parser.parse_args() 47 | 48 | def load(saver, sess, ckpt_path): 49 | '''Load trained weights. 50 | 51 | Args: 52 | saver: TensorFlow saver object. 53 | sess: TensorFlow session. 54 | ckpt_path: path to checkpoint file with parameters. 55 | ''' 56 | saver.restore(sess, ckpt_path) 57 | print("Restored model parameters from {}".format(ckpt_path)) 58 | 59 | def file_len(fname): 60 | with open(fname) as f: 61 | for i, l in enumerate(f): 62 | pass 63 | return i + 1 64 | 65 | def main(): 66 | """Create the model and start the evaluation process.""" 67 | args = get_arguments() 68 | num_steps = file_len(args.data_list) 69 | # Create queue coordinator. 70 | coord = tf.train.Coordinator() 71 | 72 | # Load reader. 73 | with tf.name_scope("create_inputs"): 74 | reader = ImageReader( 75 | args.img_path, 76 | args.data_list, 77 | None, # No defined input size. 78 | False, # No random scale. 79 | False, # No random mirror. 80 | 255, 81 | IMG_MEAN, 82 | coord) 83 | image, label = reader.image, reader.label 84 | title = reader.queue[0] 85 | image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension. 86 | 87 | # Create network. 88 | net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) 89 | 90 | # Which variables to load. 91 | restore_var = tf.global_variables() 92 | 93 | # Predictions. 94 | raw_output = net.layers['fc1_voc12'] 95 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) 96 | raw_output_up = tf.argmax(raw_output_up, dimension=3) 97 | pred = tf.expand_dims(raw_output_up, dim=3) 98 | 99 | 100 | # Set up TF session and initialize variables. 101 | config = tf.ConfigProto() 102 | config.gpu_options.allow_growth = True 103 | sess = tf.Session(config=config) 104 | init = tf.global_variables_initializer() 105 | 106 | sess.run(init) 107 | 108 | # Load weights. 109 | loader = tf.train.Saver(var_list=restore_var) 110 | load(loader, sess, args.model_weights) 111 | 112 | # Start queue threads. 113 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 114 | 115 | start_time = time.time() 116 | if not os.path.exists(args.save_dir): 117 | os.makedirs(args.save_dir) 118 | # Perform inference. 119 | for step in range(num_steps): 120 | preds, jpg_path = sess.run([pred, title]) 121 | msk = decode_labels(preds, num_classes=args.num_classes) 122 | im = Image.fromarray(msk[0]) 123 | img_o = Image.open(jpg_path) 124 | jpg_path = jpg_path.split('/')[-1].split('.')[0] 125 | img = np.array(im)*0.9 + np.array(img_o)*0.7 126 | img[img>255] = 255 127 | img = Image.fromarray(np.uint8(img)) 128 | img.save(args.save_dir + jpg_path + '.png') 129 | print('Image processed {}.png'.format(jpg_path)) 130 | 131 | total_time = time.time() - start_time 132 | print('The output files have been saved to {}'.format(args.save_dir)) 133 | print('It took {} sec on each image.'.format(total_time/num_steps)) 134 | 135 | if __name__ == '__main__': 136 | main() 137 | -------------------------------------------------------------------------------- /data_generation/README.md: -------------------------------------------------------------------------------- 1 | # Generate Parsing Label 2 | This is the code of generating parsing label for the semi-supervised training of the Parsing Network. 3 | 4 | ## Prerequisites 5 | - Linux or macOS 6 | - Python 2 and 3 7 | 8 | ## Getting Started 9 | ### Installation 10 | - Install python libraries if missing, include opencv-python, numpy, etc. 11 | 12 | - Install PyTorch 0.3.0 and dependencies from http://pytorch.org in Python 3. 13 | - Install Torch vision from the source in Python 3. 14 | ```bash 15 | git clone https://github.com/pytorch/vision 16 | cd vision 17 | python3 setup.py install 18 | ``` 19 | 20 | - Install python libraries [visdom](https://github.com/facebookresearch/visdom) and [dominate](https://github.com/Knio/dominate) in Python3. 21 | ```bash 22 | pip3 install visdom 23 | pip3 install dominate 24 | ``` 25 | 26 | - Clone this repo: 27 | ```bash 28 | git clone https://github.com/MVIG-SJTU/WSHP 29 | cd WSHP/data_generation 30 | ``` 31 | - Download [demo data](https://drive.google.com/open?id=1N6yYgrulPHqsCRACdbX7MpAWFnYNaAYm) and extract them to directory `examples`. 32 | 33 | ### Demo 34 | - For demo, just run the following bash order: 35 | ``` 36 | bash demo.sh 37 | ``` 38 | 39 | - After finished, under directory `examples/outputs`, directories `crop_output`, `merge_output`, `overlay_output` contains outputs for `cropped origin image and prior of single person`, `complete parsing label of complete origin image`, `overlayed origin image and parsing label` respectively. 40 | 41 | Origin images: 42 |

43 | 44 |

45 | 46 | Parsing labels: 47 |

48 | 49 |

50 | 51 | ### Run 52 | 1. Prepare keypoint annotations. For dataset without keypoint annotations, one can use keypoint detecion network to detect keypoint. We use [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) by running `./run.sh --indir examples/demo/ --outdir examples/results/ --dataset MPII`. **Note that our algorithm can only apply to images with whole body persons, use `pick_full_person.py` to select such images**. Under the folder `examples`, file `examples.json` is an example output and our code reads keypoint information from a json file with the same format. 53 | 54 | 2. Crop and generate prior. Python script `crop_pose_and_generate_testing_prior.py` shows how to crop out single person from origin images and generate prior given keypoint information (specified by a json file). 55 | ``` 56 | python crop_pose_and_generate_testing_prior.py --PASCALPoseFileRoot /path/to/pascal_pose_file.csv --PASCALMaskImgDir /path/to/pascal_mask_img --n 3 --k 3 --aug 0.25 --origin_img_root /path/to/origin_img --json_file_root /path/to/pose_json_file --outputDir /path/to/output 57 | ``` 58 | 59 | 3. Generate parsing label for single person. In this step we can use the test mode of a pretrained model of refinement network to generate label for each image of single person. This part please refer to submodule `refinement_network`. 60 | 61 | 4. Merge together to get the complete parsing label. Python script `merge_parsing_result.py` shows how to merge together label of different person from the same image to get the complete parsing label for each origin image. 62 | ``` 63 | python merge_parsing_result.py --outputDir /path/to/output --parsing_root /root_of_refinement_network/results/${experiment_name}/test_latest/images --origin_img_root /path/to/origin_img --json_file_root /path/to/pose_json_file --aug 0.25 64 | ``` 65 | 66 | 5. Overlay origin image with corresponding color parsing label to check the results. 67 | ``` 68 | python overlay.py --origin_img_root /path/to/origin_img --prior_img_root /path/to/origin_img --outputDir /path/to/output 69 | ``` 70 | 71 | - Bash script `pose2label.sh` gather steps 2,3,4,5 together, you can use it to get parsing label conveniently without waiting for each step to finish and start the next step. 72 | ``` 73 | bash pose2label.sh 74 | ``` 75 | You have to specify the following parameters at the begining of the script: 76 | ``` 77 | pascal_pose_file_root="/path/to/pascal_pose_file.csv" 78 | pascal_mask_img_dir="/path/to/pascal_mask_img" 79 | origin_img_root="/path/to/origin_img" 80 | json_file_root="/path/to/pose_json_file" 81 | crop_output_path="/path/to/output/cropped_img_and_prior" 82 | experiment_name="exp1" 83 | merge_output_path="/path/to/output/merged_parsing_label" 84 | overlay_output_path="/path/to/output/overlayed_image" 85 | ``` 86 | 87 | - One thing should be noted is the format of keypoints. 88 | The order we use for Pascal images is as follow: 89 | ``` 90 | 0-'right ankle' 1-'right knee' 2-'right hip' 3-'left hip' 4-'left knee' 5-'left ankle' 6-'pelvis' 7-'thorax' 8-'neck' 9-'head' 10-'right wrist' 11-'right elbow' 12-'right shoulder' 13-'left shoulder' 14-'left elbow' 15-'left wrist'. 91 | ``` 92 | Actually, `thorax` is unused and set to `(0, 0)`. And when `pelvis` is missing, we use the midpoint of two hips instead. 93 | The format of the output of [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) is different from this format and we adjust it in our code so that it can be compared with pose of Pascal images. 94 | 95 | -------------------------------------------------------------------------------- /parsing_network/evaluate.py: -------------------------------------------------------------------------------- 1 | """Evaluation script for the DeepLab-ResNet network on the validation subset 2 | of PASCAL VOC dataset. 3 | 4 | This script evaluates the model on 1449 validation images. 5 | """ 6 | 7 | from __future__ import print_function 8 | 9 | import argparse 10 | from datetime import datetime 11 | import os 12 | import sys 13 | import time 14 | 15 | import tensorflow as tf 16 | import numpy as np 17 | 18 | from deeplab_resnet import DeepLabResNetModel, ImageReader, prepare_label 19 | 20 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 21 | 22 | DATA_DIRECTORY = '/home/VOCdevkit' 23 | DATA_LIST_PATH = './dataset/pascal_test.txt' 24 | IGNORE_LABEL = 255 25 | NUM_CLASSES = 7 26 | NUM_STEPS = 1831 # Number of images in the validation set. 27 | RESTORE_FROM = './deeplab_resnet.ckpt' 28 | 29 | def get_arguments(): 30 | """Parse all the arguments provided from the CLI. 31 | 32 | Returns: 33 | A list of parsed arguments. 34 | """ 35 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network") 36 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY, 37 | help="Path to the directory containing the PASCAL VOC dataset.") 38 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH, 39 | help="Path to the file listing the images in the dataset.") 40 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL, 41 | help="The index of the label to ignore during the training.") 42 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES, 43 | help="Number of classes to predict (including background).") 44 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS, 45 | help="Number of images in the validation set.") 46 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM, 47 | help="Where restore model parameters from.") 48 | return parser.parse_args() 49 | 50 | def load(saver, sess, ckpt_path): 51 | '''Load trained weights. 52 | 53 | Args: 54 | saver: TensorFlow saver object. 55 | sess: TensorFlow session. 56 | ckpt_path: path to checkpoint file with parameters. 57 | ''' 58 | saver.restore(sess, ckpt_path) 59 | print("Restored model parameters from {}".format(ckpt_path)) 60 | 61 | def main(): 62 | """Create the model and start the evaluation process.""" 63 | args = get_arguments() 64 | 65 | # Create queue coordinator. 66 | coord = tf.train.Coordinator() 67 | 68 | # Load reader. 69 | with tf.name_scope("create_inputs"): 70 | reader = ImageReader( 71 | args.data_dir, 72 | args.data_list, 73 | None, # No defined input size. 74 | False, # No random scale. 75 | False, # No random mirror. 76 | args.ignore_label, 77 | IMG_MEAN, 78 | coord) 79 | image, label = reader.image, reader.label 80 | image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension. 81 | 82 | # Create network. 83 | net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) 84 | 85 | # Which variables to load. 86 | restore_var = tf.global_variables() 87 | 88 | # Predictions. 89 | raw_output = net.layers['fc1_voc12'] 90 | raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) 91 | raw_output = tf.argmax(raw_output, dimension=3) 92 | pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. 93 | 94 | # mIoU 95 | pred = tf.reshape(pred, [-1,]) 96 | gt = tf.reshape(label_batch, [-1,]) 97 | weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes. 98 | mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes, weights=weights) 99 | 100 | # Set up tf session and initialize variables. 101 | config = tf.ConfigProto() 102 | config.gpu_options.allow_growth = True 103 | sess = tf.Session(config=config) 104 | init = tf.global_variables_initializer() 105 | 106 | sess.run(init) 107 | sess.run(tf.local_variables_initializer()) 108 | 109 | # Load weights. 110 | loader = tf.train.Saver(var_list=restore_var) 111 | if args.restore_from is not None: 112 | load(loader, sess, args.restore_from) 113 | 114 | # Start queue threads. 115 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 116 | 117 | # Iterate over training steps. 118 | for step in range(args.num_steps): 119 | preds, _ = sess.run([pred, update_op]) 120 | if step % 100 == 0: 121 | print('step {:d}'.format(step)) 122 | print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) 123 | coord.request_stop() 124 | coord.join(threads) 125 | 126 | if __name__ == '__main__': 127 | main() 128 | -------------------------------------------------------------------------------- /data_generation/crop_pose_and_generate_testing_prior.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Given a json file containing poses for images(one image may have more than one pose corresponding to different people), 3 | crop out each people and generate corresponding prior. 4 | Then we can use the test mode of the pre-trained refinement model to generate parsing result for each cropped pose. 5 | 6 | >>> python crop_pose_and_generate_testing_prior.py --PASCALPoseFileRoot /path/to/pascal_pose_file.csv --PASCALMaskImgDir /path/to/pascal_mask_img --n 3 --k 3 --aug 0.25 --origin_img_root /path/to/origin_img --json_file_root /path/to/pose_json_file --outputDir /path/to/output 7 | >>> 8 | ''' 9 | 10 | import argparse 11 | import json 12 | from generate_prior_util import * 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("--PASCALPoseFileRoot", help="path to PASCAL pose file") 16 | parser.add_argument("--PASCALMaskImgDir", help="path to PASCAL mask images") 17 | parser.add_argument("--origin_img_root", help="path to origin img") 18 | parser.add_argument("--json_file_root", help="path to json file") 19 | parser.add_argument("--outputDir", help="where to put output files") 20 | parser.add_argument("--draw_skeleton", action="store_true", help="draw skeleton to check the format of keypoints") 21 | parser.add_argument("--n", type=int, default=5, help="number of close images picked first time") 22 | parser.add_argument("--k", type=int, default=3, help="number of close images picked for prior generation in n picked images") 23 | parser.add_argument("--aug", type=float, default=0.25, help='augmentation factor for crop') 24 | opt = parser.parse_args() 25 | 26 | json_file_root = opt.json_file_root 27 | origin_img_root = opt.origin_img_root 28 | json_file = open(json_file_root, "r") 29 | json_string = json_file.readline() 30 | json_dict = json.loads(json_string) 31 | print('length of json_dict', len(json_dict)) 32 | 33 | pascal_poses, pascal_img_names, pascal_pose_dict = load_pascal_pose(opt.PASCALPoseFileRoot) 34 | print('length of pascal_img', len(pascal_img_names)) 35 | 36 | if not os.path.exists(opt.outputDir): 37 | os.makedirs(opt.outputDir) 38 | img_dir = os.path.join(opt.outputDir, 'img') 39 | prior_dir = os.path.join(opt.outputDir, 'prior') 40 | if not os.path.exists(img_dir): 41 | os.makedirs(img_dir) 42 | if not os.path.exists(prior_dir): 43 | os.makedirs(prior_dir) 44 | 45 | if opt.draw_skeleton: 46 | skeleton_dir = os.path.join(opt.outputDir, 'skeleton') 47 | if not os.path.exists(skeleton_dir): 48 | os.makedirs(skeleton_dir) 49 | 50 | # alphapose to pascal keypoints order 51 | alphapose2pascal = [9, 8, 12, 11, 10, 13, 14, 15, 2, 1, 0, 3, 4, 5, 7] 52 | # the 6th keypoint is missing 53 | 54 | num_images = 0 55 | for k, v in json_dict.items(): 56 | num_images += 1 57 | image_id = k 58 | origin_img = cv2.imread(os.path.join(origin_img_root, image_id)) 59 | bodies = v["bodies"] 60 | for i in range(len(bodies)): 61 | body = bodies[i] 62 | keypoints = body["joints"] 63 | raw_pose = np.zeros((1, 32), dtype=float) 64 | min_x = keypoints[0] 65 | max_x = min_x 66 | min_y = keypoints[1] 67 | max_y = min_y 68 | for j in range(15): 69 | x = keypoints[3*j] 70 | y = keypoints[3*j+1] 71 | raw_pose[0][2*alphapose2pascal[j]] = x 72 | raw_pose[0][2*alphapose2pascal[j]+1] = y 73 | if x < min_x: 74 | min_x = x 75 | elif x > max_x: 76 | max_x = x 77 | if y < min_y: 78 | min_y = y 79 | elif y > max_y: 80 | max_y = y 81 | raw_pose[0][2*6] = (raw_pose[0][2*2] + raw_pose[0][2*3]) / 2 82 | raw_pose[0][2*6+1] = (raw_pose[0][2*2+1] + raw_pose[0][2*3+1]) / 2 83 | if max_x > origin_img.shape[1] or max_y > origin_img.shape[0]-1: 84 | print(max_x, max_y) 85 | print(image_id + " pose outside img") 86 | 87 | # deal with bbox 88 | bbox = [min_x, min_y, max_x, max_y] 89 | xaug = int((max_x - min_x + 1) * opt.aug) 90 | yaug = int((max_y - min_y + 1) * opt.aug) 91 | bbox[0] = max(bbox[0] - xaug, 0) 92 | bbox[1] = max(bbox[1] - yaug, 0) 93 | bbox[2] = min(bbox[2] + xaug, origin_img.shape[1]-1) 94 | bbox[3] = min(bbox[3] + yaug, origin_img.shape[0]-1) 95 | print('bbox', bbox) 96 | 97 | prior = generate_prior_single_person(bbox, raw_pose, opt.PASCALMaskImgDir, pascal_poses, pascal_img_names, pascal_pose_dict, opt.n, opt.k) 98 | prior = prior[:, :, [2, 1, 0]] 99 | img = origin_img[bbox[1]:bbox[3]+1, bbox[0]:bbox[2]+1] 100 | if opt.draw_skeleton: 101 | skeleton_img = drawSkeleton(origin_img, raw_pose) 102 | cv2.imwrite(os.path.join(skeleton_dir, image_id.split('.')[0]+'_'+str(i)+'.jpg'), skeleton_img) 103 | cv2.imwrite(os.path.join(img_dir, image_id.split('.')[0]+'_'+str(i)+'.jpg'), img) 104 | cv2.imwrite(os.path.join(prior_dir, image_id.split('.')[0]+'_'+str(i)+'.jpg'), prior) 105 | 106 | print(image_id, i, num_images) 107 | 108 | print('finished') 109 | -------------------------------------------------------------------------------- /parsing_network/kaffe/layers.py: -------------------------------------------------------------------------------- 1 | import re 2 | import numbers 3 | from collections import namedtuple 4 | 5 | from .shapes import * 6 | 7 | LAYER_DESCRIPTORS = { 8 | 9 | # Caffe Types 10 | 'AbsVal': shape_identity, 11 | 'Accuracy': shape_scalar, 12 | 'ArgMax': shape_not_implemented, 13 | 'BatchNorm': shape_identity, 14 | 'BNLL': shape_not_implemented, 15 | 'Concat': shape_concat, 16 | 'ContrastiveLoss': shape_scalar, 17 | 'Convolution': shape_convolution, 18 | 'Deconvolution': shape_not_implemented, 19 | 'Data': shape_data, 20 | 'Dropout': shape_identity, 21 | 'DummyData': shape_data, 22 | 'EuclideanLoss': shape_scalar, 23 | 'Eltwise': shape_identity, 24 | 'Exp': shape_identity, 25 | 'Flatten': shape_not_implemented, 26 | 'HDF5Data': shape_data, 27 | 'HDF5Output': shape_identity, 28 | 'HingeLoss': shape_scalar, 29 | 'Im2col': shape_not_implemented, 30 | 'ImageData': shape_data, 31 | 'InfogainLoss': shape_scalar, 32 | 'InnerProduct': shape_inner_product, 33 | 'Input': shape_data, 34 | 'LRN': shape_identity, 35 | 'MemoryData': shape_mem_data, 36 | 'MultinomialLogisticLoss': shape_scalar, 37 | 'MVN': shape_not_implemented, 38 | 'Pooling': shape_pool, 39 | 'Power': shape_identity, 40 | 'ReLU': shape_identity, 41 | 'Scale': shape_identity, 42 | 'Sigmoid': shape_identity, 43 | 'SigmoidCrossEntropyLoss': shape_scalar, 44 | 'Silence': shape_not_implemented, 45 | 'Softmax': shape_identity, 46 | 'SoftmaxWithLoss': shape_scalar, 47 | 'Split': shape_not_implemented, 48 | 'Slice': shape_not_implemented, 49 | 'TanH': shape_identity, 50 | 'WindowData': shape_not_implemented, 51 | 'Threshold': shape_identity, 52 | } 53 | 54 | LAYER_TYPES = LAYER_DESCRIPTORS.keys() 55 | 56 | LayerType = type('LayerType', (), {t: t for t in LAYER_TYPES}) 57 | 58 | class NodeKind(LayerType): 59 | 60 | @staticmethod 61 | def map_raw_kind(kind): 62 | if kind in LAYER_TYPES: 63 | return kind 64 | return None 65 | 66 | @staticmethod 67 | def compute_output_shape(node): 68 | try: 69 | val = LAYER_DESCRIPTORS[node.kind](node) 70 | return val 71 | except NotImplementedError: 72 | raise KaffeError('Output shape computation not implemented for type: %s' % node.kind) 73 | 74 | 75 | class NodeDispatchError(KaffeError): 76 | 77 | pass 78 | 79 | 80 | class NodeDispatch(object): 81 | 82 | @staticmethod 83 | def get_handler_name(node_kind): 84 | if len(node_kind) <= 4: 85 | # A catch-all for things like ReLU and tanh 86 | return node_kind.lower() 87 | # Convert from CamelCase to under_scored 88 | name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', node_kind) 89 | return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower() 90 | 91 | def get_handler(self, node_kind, prefix): 92 | name = self.get_handler_name(node_kind) 93 | name = '_'.join((prefix, name)) 94 | try: 95 | return getattr(self, name) 96 | except AttributeError: 97 | raise NodeDispatchError('No handler found for node kind: %s (expected: %s)' % 98 | (node_kind, name)) 99 | 100 | 101 | class LayerAdapter(object): 102 | 103 | def __init__(self, layer, kind): 104 | self.layer = layer 105 | self.kind = kind 106 | 107 | @property 108 | def parameters(self): 109 | name = NodeDispatch.get_handler_name(self.kind) 110 | name = '_'.join((name, 'param')) 111 | try: 112 | return getattr(self.layer, name) 113 | except AttributeError: 114 | raise NodeDispatchError('Caffe parameters not found for layer kind: %s' % (self.kind)) 115 | 116 | @staticmethod 117 | def get_kernel_value(scalar, repeated, idx, default=None): 118 | if scalar: 119 | return scalar 120 | if repeated: 121 | if isinstance(repeated, numbers.Number): 122 | return repeated 123 | if len(repeated) == 1: 124 | # Same value applies to all spatial dimensions 125 | return int(repeated[0]) 126 | assert idx < len(repeated) 127 | # Extract the value for the given spatial dimension 128 | return repeated[idx] 129 | if default is None: 130 | raise ValueError('Unable to determine kernel parameter!') 131 | return default 132 | 133 | @property 134 | def kernel_parameters(self): 135 | assert self.kind in (NodeKind.Convolution, NodeKind.Pooling) 136 | params = self.parameters 137 | k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0) 138 | k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1) 139 | s_h = self.get_kernel_value(params.stride_h, params.stride, 0, default=1) 140 | s_w = self.get_kernel_value(params.stride_w, params.stride, 1, default=1) 141 | p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0) 142 | p_w = self.get_kernel_value(params.pad_h, params.pad, 1, default=0) 143 | return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w) 144 | 145 | 146 | KernelParameters = namedtuple('KernelParameters', ['kernel_h', 'kernel_w', 'stride_h', 'stride_w', 147 | 'pad_h', 'pad_w']) 148 | -------------------------------------------------------------------------------- /data_generation/refinement_network/options/base_options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from util import util 4 | import torch 5 | 6 | 7 | class BaseOptions(): 8 | def __init__(self): 9 | self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 10 | self.initialized = False 11 | 12 | def initialize(self): 13 | self.parser.add_argument('--dataroot', required=True, help='path to images. ' 14 | 'For train(aligned_dataset), should have subdir {img}/{prior}/{parsing}, and {prior} should have subdir containing n prior images for each image in {img}, named by the name of each image;' 15 | 'For test(single_dataset), should have subdir {img}/{prior}') 16 | self.parser.add_argument('--batchSize', type=int, default=1, help='input batch size') 17 | self.parser.add_argument('--loadSize', type=int, default=286, help='scale images to this size') 18 | self.parser.add_argument('--fineSize', type=int, default=256, help='then crop to this size') 19 | self.parser.add_argument('--input_nc', type=int, default=6, help='# of input image channels') 20 | self.parser.add_argument('--output_nc', type=int, default=1, help='# of output image channels') 21 | self.parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in first conv layer') 22 | self.parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in first conv layer') 23 | self.parser.add_argument('--which_model_netD', type=str, default='basic', help='selects model to use for netD') 24 | self.parser.add_argument('--which_model_netG', type=str, default='unet_256', help='selects model to use for netG') 25 | self.parser.add_argument('--n_layers_D', type=int, default=3, help='only used if which_model_netD==n_layers') 26 | self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') 27 | self.parser.add_argument('--name', type=str, default='experiment_name', help='name of the experiment. It decides where to store samples and models') 28 | self.parser.add_argument('--dataset_mode', type=str, default='aligned_prior', help='chooses how datasets are loaded. [aligned | single]') 29 | self.parser.add_argument('--model', type=str, default='pix2pix', help='chooses which model to use. pix2pix, test') 30 | self.parser.add_argument('--which_direction', type=str, default='AtoB', help='AtoB or BtoA') 31 | self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data') 32 | self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here') 33 | self.parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization') 34 | self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') 35 | self.parser.add_argument('--display_winsize', type=int, default=256, help='display window size') 36 | self.parser.add_argument('--display_id', type=int, default=0, help='window id of the web display') 37 | self.parser.add_argument('--display_port', type=int, default=8097, help='visdom port of the web display') 38 | self.parser.add_argument('--no_dropout', action='store_true', help='no dropout for the generator') 39 | self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.') 40 | self.parser.add_argument('--resize_or_crop', type=str, default='resize_and_crop', help='scaling and cropping of images at load time [resize_and_crop|crop|scale_width|scale_width_and_crop]') 41 | self.parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data augmentation') 42 | self.parser.add_argument('--init_type', type=str, default='normal', help='network initialization [normal|xavier|kaiming|orthogonal]') 43 | 44 | self.initialized = True 45 | 46 | def parse(self): 47 | if not self.initialized: 48 | self.initialize() 49 | self.opt = self.parser.parse_args() 50 | self.opt.isTrain = self.isTrain # train or test 51 | 52 | str_ids = self.opt.gpu_ids.split(',') 53 | self.opt.gpu_ids = [] 54 | for str_id in str_ids: 55 | id = int(str_id) 56 | if id >= 0: 57 | self.opt.gpu_ids.append(id) 58 | 59 | # set gpu ids 60 | if len(self.opt.gpu_ids) > 0: 61 | torch.cuda.set_device(self.opt.gpu_ids[0]) 62 | 63 | args = vars(self.opt) 64 | 65 | print('------------ Options -------------') 66 | for k, v in sorted(args.items()): 67 | print('%s: %s' % (str(k), str(v))) 68 | print('-------------- End ----------------') 69 | 70 | # save to the disk 71 | expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name) 72 | util.mkdirs(expr_dir) 73 | file_name = os.path.join(expr_dir, 'opt.txt') 74 | with open(file_name, 'wt') as opt_file: 75 | opt_file.write('------------ Options -------------\n') 76 | for k, v in sorted(args.items()): 77 | opt_file.write('%s: %s\n' % (str(k), str(v))) 78 | opt_file.write('-------------- End ----------------\n') 79 | return self.opt 80 | -------------------------------------------------------------------------------- /data_generation/refinement_network/util/visualizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import ntpath 4 | import time 5 | from . import util 6 | from . import html 7 | from scipy.misc import imresize 8 | 9 | 10 | class Visualizer(): 11 | def __init__(self, opt): 12 | # self.opt = opt 13 | self.display_id = opt.display_id 14 | self.use_html = opt.isTrain and not opt.no_html 15 | self.win_size = opt.display_winsize 16 | self.name = opt.name 17 | self.opt = opt 18 | self.saved = False 19 | if self.display_id > 0: 20 | import visdom 21 | self.vis = visdom.Visdom(port=opt.display_port) 22 | 23 | if self.use_html: 24 | self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web') 25 | self.img_dir = os.path.join(self.web_dir, 'images') 26 | print('create web directory %s...' % self.web_dir) 27 | util.mkdirs([self.web_dir, self.img_dir]) 28 | self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt') 29 | with open(self.log_name, "a") as log_file: 30 | now = time.strftime("%c") 31 | log_file.write('================ Training Loss (%s) ================\n' % now) 32 | 33 | def reset(self): 34 | self.saved = False 35 | 36 | # |visuals|: dictionary of images to display or save 37 | def display_current_results(self, visuals, epoch, save_result): 38 | if self.display_id > 0: # show images in the browser 39 | ncols = self.opt.display_single_pane_ncols 40 | if ncols > 0: 41 | h, w = next(iter(visuals.values())).shape[:2] 42 | table_css = """""" % (w, h) 46 | title = self.name 47 | label_html = '' 48 | label_html_row = '' 49 | nrows = int(np.ceil(len(visuals.items()) / ncols)) 50 | images = [] 51 | idx = 0 52 | for label, image_numpy in visuals.items(): 53 | label_html_row += '%s' % label 54 | images.append(image_numpy.transpose([2, 0, 1])) 55 | idx += 1 56 | if idx % ncols == 0: 57 | label_html += '%s' % label_html_row 58 | label_html_row = '' 59 | white_image = np.ones_like(image_numpy.transpose([2, 0, 1])) * 255 60 | while idx % ncols != 0: 61 | images.append(white_image) 62 | label_html_row += '' 63 | idx += 1 64 | if label_html_row != '': 65 | label_html += '%s' % label_html_row 66 | # pane col = image row 67 | self.vis.images(images, nrow=ncols, win=self.display_id + 1, 68 | padding=2, opts=dict(title=title + ' images')) 69 | label_html = '%s
' % label_html 70 | self.vis.text(table_css + label_html, win=self.display_id + 2, 71 | opts=dict(title=title + ' labels')) 72 | else: 73 | idx = 1 74 | for label, image_numpy in visuals.items(): 75 | self.vis.image(image_numpy.transpose([2, 0, 1]), opts=dict(title=label), 76 | win=self.display_id + idx) 77 | idx += 1 78 | 79 | if self.use_html and (save_result or not self.saved): # save images to a html file 80 | self.saved = True 81 | for label, image_numpy in visuals.items(): 82 | img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label)) 83 | util.save_image(image_numpy, img_path) 84 | # update website 85 | webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, reflesh=1) 86 | for n in range(epoch, 0, -1): 87 | webpage.add_header('epoch [%d]' % n) 88 | ims = [] 89 | txts = [] 90 | links = [] 91 | 92 | for label, image_numpy in visuals.items(): 93 | img_path = 'epoch%.3d_%s.png' % (n, label) 94 | ims.append(img_path) 95 | txts.append(label) 96 | links.append(img_path) 97 | webpage.add_images(ims, txts, links, width=self.win_size) 98 | webpage.save() 99 | 100 | # errors: dictionary of error labels and values 101 | def plot_current_errors(self, epoch, counter_ratio, opt, errors): 102 | if not hasattr(self, 'plot_data'): 103 | self.plot_data = {'X': [], 'Y': [], 'legend': list(errors.keys())} 104 | self.plot_data['X'].append(epoch + counter_ratio) 105 | self.plot_data['Y'].append([errors[k] for k in self.plot_data['legend']]) 106 | self.vis.line( 107 | X=np.stack([np.array(self.plot_data['X'])] * len(self.plot_data['legend']), 1), 108 | Y=np.array(self.plot_data['Y']), 109 | opts={ 110 | 'title': self.name + ' loss over time', 111 | 'legend': self.plot_data['legend'], 112 | 'xlabel': 'epoch', 113 | 'ylabel': 'loss'}, 114 | win=self.display_id) 115 | 116 | # errors: same format as |errors| of plotCurrentErrors 117 | def print_current_errors(self, epoch, i, errors, t, t_data): 118 | message = '(epoch: %d, iters: %d, time: %.3f, data: %.3f) ' % (epoch, i, t, t_data) 119 | for k, v in errors.items(): 120 | message += '%s: %.3f ' % (k, v) 121 | 122 | print(message) 123 | with open(self.log_name, "a") as log_file: 124 | log_file.write('%s\n' % message) 125 | 126 | # save image to the disk 127 | def save_images(self, webpage, visuals, image_path, aspect_ratio=1.0): 128 | image_dir = webpage.get_image_dir() 129 | short_path = ntpath.basename(image_path[0]) 130 | name = os.path.splitext(short_path)[0] 131 | 132 | webpage.add_header(name) 133 | ims = [] 134 | txts = [] 135 | links = [] 136 | 137 | for label, im in visuals.items(): 138 | image_name = '%s_%s.png' % (name, label) 139 | save_path = os.path.join(image_dir, image_name) 140 | h, w, _ = im.shape 141 | if aspect_ratio > 1.0: 142 | im = imresize(im, (h, int(w * aspect_ratio)), interp='bicubic') 143 | if aspect_ratio < 1.0: 144 | im = imresize(im, (int(h / aspect_ratio), w), interp='bicubic') 145 | util.save_image(im, save_path) 146 | 147 | ims.append(image_name) 148 | txts.append(label) 149 | links.append(image_name) 150 | webpage.add_images(ims, txts, links, width=self.win_size) 151 | -------------------------------------------------------------------------------- /data_generation/refinement_network/models/pix2pix_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from collections import OrderedDict 3 | from torch.autograd import Variable 4 | import util.util as util 5 | from util.image_pool import ImagePool 6 | from .base_model import BaseModel 7 | from . import networks 8 | 9 | 10 | class Pix2PixModel(BaseModel): 11 | def name(self): 12 | return 'Pix2PixModel' 13 | 14 | def initialize(self, opt): 15 | BaseModel.initialize(self, opt) 16 | self.isTrain = opt.isTrain 17 | 18 | # load/define networks 19 | self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, 20 | opt.which_model_netG, opt.norm, not opt.no_dropout, opt.init_type, self.gpu_ids) 21 | if self.isTrain and (not opt.no_gan): 22 | use_sigmoid = opt.no_lsgan 23 | self.netD = networks.define_D(opt.input_nc + opt.output_nc, opt.ndf, 24 | opt.which_model_netD, 25 | opt.n_layers_D, opt.norm, use_sigmoid, opt.init_type, self.gpu_ids) 26 | if not self.isTrain or opt.continue_train: 27 | self.load_network(self.netG, 'G', opt.which_epoch) 28 | if self.isTrain and (not opt.no_gan): 29 | self.load_network(self.netD, 'D', opt.which_epoch) 30 | 31 | if self.isTrain: 32 | self.fake_AB_pool = ImagePool(opt.pool_size) 33 | # define loss functions 34 | self.criterionGAN = networks.GANLoss(use_lsgan=not opt.no_lsgan, tensor=self.Tensor) 35 | if opt.use_l2: 36 | self.criterionL1 = torch.nn.MSELoss() 37 | else: 38 | self.criterionL1 = torch.nn.L1Loss() 39 | 40 | # initialize optimizers 41 | self.schedulers = [] 42 | self.optimizers = [] 43 | self.optimizer_G = torch.optim.Adam(self.netG.parameters(), 44 | lr=opt.lr, betas=(opt.beta1, 0.999)) 45 | self.optimizers.append(self.optimizer_G) 46 | if not opt.no_gan: 47 | self.optimizer_D = torch.optim.Adam(self.netD.parameters(), 48 | lr=opt.lr, betas=(opt.beta1, 0.999)) 49 | self.optimizers.append(self.optimizer_D) 50 | for optimizer in self.optimizers: 51 | self.schedulers.append(networks.get_scheduler(optimizer, opt)) 52 | 53 | print('---------- Networks initialized -------------') 54 | networks.print_network(self.netG) 55 | if self.isTrain and (not opt.no_gan): 56 | networks.print_network(self.netD) 57 | print('-----------------------------------------------') 58 | 59 | def set_input(self, input): 60 | AtoB = self.opt.which_direction == 'AtoB' 61 | input_A = input['A' if AtoB else 'B'] 62 | input_B = input['B' if AtoB else 'A'] 63 | if len(self.gpu_ids) > 0: 64 | input_A = input_A.cuda(self.gpu_ids[0], async=True) 65 | input_B = input_B.cuda(self.gpu_ids[0], async=True) 66 | self.input_A = input_A 67 | self.input_B = input_B 68 | self.image_paths = input['A_paths' if AtoB else 'B_paths'] 69 | 70 | def forward(self): 71 | self.real_A = Variable(self.input_A) 72 | self.fake_B = self.netG(self.real_A) 73 | self.real_B = Variable(self.input_B) 74 | 75 | # no backprop gradients 76 | def test(self): 77 | self.real_A = Variable(self.input_A, volatile=True) 78 | self.fake_B = self.netG(self.real_A) 79 | self.real_B = Variable(self.input_B, volatile=True) 80 | 81 | # get image paths 82 | def get_image_paths(self): 83 | return self.image_paths 84 | 85 | def backward_D(self): 86 | # Fake 87 | # stop backprop to the generator by detaching fake_B 88 | fake_AB = self.fake_AB_pool.query(torch.cat((self.real_A, self.fake_B), 1).data) 89 | pred_fake = self.netD(fake_AB.detach()) 90 | self.loss_D_fake = self.criterionGAN(pred_fake, False) 91 | 92 | # Real 93 | real_AB = torch.cat((self.real_A, self.real_B), 1) 94 | pred_real = self.netD(real_AB) 95 | self.loss_D_real = self.criterionGAN(pred_real, True) 96 | 97 | # Combined loss 98 | self.loss_D = (self.loss_D_fake + self.loss_D_real) * 0.5 99 | 100 | self.loss_D.backward() 101 | 102 | def backward_G(self): 103 | if not self.opt.no_gan: 104 | # First, G(A) should fake the discriminator 105 | fake_AB = torch.cat((self.real_A, self.fake_B), 1) 106 | pred_fake = self.netD(fake_AB) 107 | self.loss_G_GAN = self.criterionGAN(pred_fake, True) 108 | else: 109 | self.loss_G_GAN = 0 110 | 111 | # Second, G(A) = B 112 | self.loss_G_L1 = self.criterionL1(self.fake_B, self.real_B) * self.opt.lambda_A 113 | 114 | self.loss_G = self.loss_G_GAN + self.loss_G_L1 115 | 116 | self.loss_G.backward() 117 | 118 | def optimize_parameters(self): 119 | self.forward() 120 | if not self.opt.no_gan: 121 | self.optimizer_D.zero_grad() 122 | self.backward_D() 123 | self.optimizer_D.step() 124 | 125 | self.optimizer_G.zero_grad() 126 | self.backward_G() 127 | self.optimizer_G.step() 128 | 129 | def get_current_errors(self): 130 | if not self.opt.no_gan: 131 | return OrderedDict([('G_GAN', self.loss_G_GAN.data[0]), 132 | ('G_L1', self.loss_G_L1.data[0]), 133 | ('D_real', self.loss_D_real.data[0]), 134 | ('D_fake', self.loss_D_fake.data[0]) 135 | ]) 136 | else: 137 | return OrderedDict([ 138 | ('G_L1', self.loss_G_L1.data[0]) 139 | ]) 140 | 141 | def get_current_visuals(self): 142 | real_A_img, real_A_prior = util.tensor2im(self.real_A.data) 143 | fake_B = util.tensor2im(self.fake_B.data) 144 | real_B = util.tensor2im(self.real_B.data) 145 | if self.opt.output_nc == 1: 146 | fake_B_postprocessed = util.postprocess_parsing(fake_B, self.isTrain) 147 | fake_B_color = util.paint_color(fake_B_postprocessed) 148 | real_B_color = util.paint_color(util.postprocess_parsing(real_B, self.isTrain)) 149 | if self.opt.output_nc == 1: 150 | return OrderedDict([ 151 | ('real_A_img', real_A_img), 152 | ('real_A_prior', real_A_prior), 153 | ('fake_B', fake_B), 154 | ('fake_B_postprocessed', fake_B_postprocessed), 155 | ('fake_B_color', fake_B_color), 156 | ('real_B', real_B), 157 | ('real_B_color', real_B_color)] 158 | ) 159 | else: 160 | return OrderedDict([ 161 | ('real_A_img', real_A_img), 162 | ('real_A_prior', real_A_prior), 163 | ('fake_B', fake_B), 164 | ('real_B', real_B)] 165 | ) 166 | 167 | def save(self, label): 168 | self.save_network(self.netG, 'G', label, self.gpu_ids) 169 | if not self.opt.no_gan: 170 | self.save_network(self.netD, 'D', label, self.gpu_ids) 171 | -------------------------------------------------------------------------------- /parsing_network/deeplab_resnet/utils.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | # colour map 6 | label_colours = [(0,0,0) 7 | # 0=background 8 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128) 9 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 10 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0) 11 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 12 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128) 13 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person 14 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128) 15 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128) 16 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 17 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0) 18 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 19 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128) 20 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person 21 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128) 22 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128) 23 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 24 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0) 25 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 26 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128) 27 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person 28 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128) 29 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128) 30 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 31 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0) 32 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 33 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128) 34 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person 35 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128) 36 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128) 37 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 38 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0) 39 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 40 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128) 41 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person 42 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128) 43 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128) 44 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 45 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0) 46 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 47 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128) 48 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person 49 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128) 50 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128) 51 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 52 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0) 53 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 54 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128) 55 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person 56 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128) 57 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128) 58 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 59 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0) 60 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 61 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128) 62 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person 63 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128) 64 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128) 65 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 66 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0) 67 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 68 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128) 69 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person 70 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)] 71 | # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor 72 | 73 | def decode_labels(mask, num_images=1, num_classes=21): 74 | """Decode batch of segmentation masks. 75 | 76 | Args: 77 | mask: result of inference after taking argmax. 78 | num_images: number of images to decode from the batch. 79 | num_classes: number of classes to predict (including background). 80 | 81 | Returns: 82 | A batch with num_images RGB images of the same size as the input. 83 | """ 84 | n, h, w, c = mask.shape 85 | assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images) 86 | outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8) 87 | for i in range(num_images): 88 | img = Image.new('RGB', (len(mask[i, 0]), len(mask[i]))) 89 | pixels = img.load() 90 | for j_, j in enumerate(mask[i, :, :, 0]): 91 | for k_, k in enumerate(j): 92 | if k < num_classes: 93 | pixels[k_,j_] = label_colours[k] 94 | outputs[i] = np.array(img) 95 | return outputs 96 | 97 | def prepare_label(input_batch, new_size, num_classes, one_hot=True): 98 | """Resize masks and perform one-hot encoding. 99 | 100 | Args: 101 | input_batch: input tensor of shape [batch_size H W 1]. 102 | new_size: a tensor with new height and width. 103 | num_classes: number of classes to predict (including background). 104 | one_hot: whether perform one-hot encoding. 105 | 106 | Returns: 107 | Outputs a tensor of shape [batch_size h w 21] 108 | with last dimension comprised of 0's and 1's only. 109 | """ 110 | with tf.name_scope('label_encode'): 111 | input_batch = tf.image.resize_nearest_neighbor(input_batch, new_size) # as labels are integer numbers, need to use NN interp. 112 | input_batch = tf.squeeze(input_batch, squeeze_dims=[3]) # reducing the channel dimension. 113 | if one_hot: 114 | input_batch = tf.one_hot(input_batch, depth=num_classes) 115 | return input_batch 116 | 117 | def inv_preprocess(imgs, num_images, img_mean): 118 | """Inverse preprocessing of the batch of images. 119 | Add the mean vector and convert from BGR to RGB. 120 | 121 | Args: 122 | imgs: batch of input images. 123 | num_images: number of images to apply the inverse transformations on. 124 | img_mean: vector of mean colour values. 125 | 126 | Returns: 127 | The batch of the size num_images with the same spatial dimensions as the input. 128 | """ 129 | n, h, w, c = imgs.shape 130 | assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images) 131 | outputs = np.zeros((num_images, h, w, c), dtype=np.uint8) 132 | for i in range(num_images): 133 | outputs[i] = (imgs[i] + img_mean)[:, :, ::-1].astype(np.uint8) 134 | return outputs 135 | -------------------------------------------------------------------------------- /parsing_network/deeplab_resnet/image_reader.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | def image_scaling(img, label): 7 | """ 8 | Randomly scales the images between 0.5 to 1.5 times the original size. 9 | 10 | Args: 11 | img: Training image to scale. 12 | label: Segmentation mask to scale. 13 | """ 14 | 15 | scale = tf.random_uniform([1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None) 16 | h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[0]), scale)) 17 | w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[1]), scale)) 18 | new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1]) 19 | img = tf.image.resize_images(img, new_shape) 20 | label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape) 21 | label = tf.squeeze(label, squeeze_dims=[0]) 22 | 23 | return img, label 24 | 25 | def image_mirroring(img, label): 26 | """ 27 | Randomly mirrors the images. 28 | 29 | Args: 30 | img: Training image to mirror. 31 | label: Segmentation mask to mirror. 32 | """ 33 | 34 | distort_left_right_random = tf.random_uniform([1], 0, 1.0, dtype=tf.float32)[0] 35 | mirror = tf.less(tf.stack([1.0, distort_left_right_random, 1.0]), 0.5) 36 | mirror = tf.boolean_mask([0, 1, 2], mirror) 37 | img = tf.reverse(img, mirror) 38 | label = tf.reverse(label, mirror) 39 | return img, label 40 | 41 | def random_crop_and_pad_image_and_labels(image, label, crop_h, crop_w, ignore_label=255): 42 | """ 43 | Randomly crop and pads the input images. 44 | 45 | Args: 46 | image: Training image to crop/ pad. 47 | label: Segmentation mask to crop/ pad. 48 | crop_h: Height of cropped segment. 49 | crop_w: Width of cropped segment. 50 | ignore_label: Label to ignore during the training. 51 | """ 52 | 53 | label = tf.cast(label, dtype=tf.float32) 54 | label = label - ignore_label # Needs to be subtracted and later added due to 0 padding. 55 | combined = tf.concat(axis=2, values=[image, label]) 56 | image_shape = tf.shape(image) 57 | combined_pad = tf.image.pad_to_bounding_box(combined, 0, 0, tf.maximum(crop_h, image_shape[0]), tf.maximum(crop_w, image_shape[1])) 58 | 59 | last_image_dim = tf.shape(image)[-1] 60 | last_label_dim = tf.shape(label)[-1] 61 | combined_crop = tf.random_crop(combined_pad, [crop_h,crop_w,4]) 62 | img_crop = combined_crop[:, :, :last_image_dim] 63 | label_crop = combined_crop[:, :, last_image_dim:] 64 | label_crop = label_crop + ignore_label 65 | label_crop = tf.cast(label_crop, dtype=tf.uint8) 66 | 67 | # Set static shape so that tensorflow knows shape at compile time. 68 | img_crop.set_shape((crop_h, crop_w, 3)) 69 | label_crop.set_shape((crop_h,crop_w, 1)) 70 | return img_crop, label_crop 71 | 72 | def read_labeled_image_list(data_dir, data_list): 73 | """Reads txt file containing paths to images and ground truth masks. 74 | 75 | Args: 76 | data_dir: path to the directory with images and masks. 77 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'. 78 | 79 | Returns: 80 | Two lists with all file names for images and masks, respectively. 81 | """ 82 | f = open(data_list, 'r') 83 | images = [] 84 | masks = [] 85 | for line in f: 86 | try: 87 | image, mask = line.strip("\n").split(' ') 88 | except ValueError: # Adhoc for test. 89 | image = mask = line.strip("\n") 90 | images.append(data_dir + image) 91 | masks.append(data_dir + mask) 92 | return images, masks 93 | 94 | def read_images_from_disk(input_queue, input_size, random_scale, random_mirror, ignore_label, img_mean): # optional pre-processing arguments 95 | """Read one image and its corresponding mask with optional pre-processing. 96 | 97 | Args: 98 | input_queue: tf queue with paths to the image and its mask. 99 | input_size: a tuple with (height, width) values. 100 | If not given, return images of original size. 101 | random_scale: whether to randomly scale the images prior 102 | to random crop. 103 | random_mirror: whether to randomly mirror the images prior 104 | to random crop. 105 | ignore_label: index of label to ignore during the training. 106 | img_mean: vector of mean colour values. 107 | 108 | Returns: 109 | Two tensors: the decoded image and its mask. 110 | """ 111 | 112 | img_contents = tf.read_file(input_queue[0]) 113 | label_contents = tf.read_file(input_queue[1]) 114 | 115 | img = tf.image.decode_jpeg(img_contents, channels=3) 116 | img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img) 117 | img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32) 118 | # Extract mean. 119 | img -= img_mean 120 | 121 | label = tf.image.decode_png(label_contents, channels=1) 122 | 123 | if input_size is not None: 124 | h, w = input_size 125 | 126 | # Randomly scale the images and labels. 127 | if random_scale: 128 | img, label = image_scaling(img, label) 129 | 130 | # Randomly mirror the images and labels. 131 | if random_mirror: 132 | img, label = image_mirroring(img, label) 133 | 134 | # Randomly crops the images and labels. 135 | img, label = random_crop_and_pad_image_and_labels(img, label, h, w, ignore_label) 136 | 137 | return img, label 138 | 139 | class ImageReader(object): 140 | '''Generic ImageReader which reads images and corresponding segmentation 141 | masks from the disk, and enqueues them into a TensorFlow queue. 142 | ''' 143 | 144 | def __init__(self, data_dir, data_list, input_size, 145 | random_scale, random_mirror, ignore_label, img_mean, coord): 146 | '''Initialise an ImageReader. 147 | 148 | Args: 149 | data_dir: path to the directory with images and masks. 150 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'. 151 | input_size: a tuple with (height, width) values, to which all the images will be resized. 152 | random_scale: whether to randomly scale the images prior to random crop. 153 | random_mirror: whether to randomly mirror the images prior to random crop. 154 | ignore_label: index of label to ignore during the training. 155 | img_mean: vector of mean colour values. 156 | coord: TensorFlow queue coordinator. 157 | ''' 158 | self.data_dir = data_dir 159 | self.data_list = data_list 160 | self.input_size = input_size 161 | self.coord = coord 162 | 163 | self.image_list, self.label_list = read_labeled_image_list(self.data_dir, self.data_list) 164 | self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string) 165 | self.labels = tf.convert_to_tensor(self.label_list, dtype=tf.string) 166 | self.queue = tf.train.slice_input_producer([self.images, self.labels], 167 | shuffle=input_size is not None) # not shuffling if it is val 168 | self.image, self.label = read_images_from_disk(self.queue, self.input_size, random_scale, random_mirror, ignore_label, img_mean) 169 | 170 | def dequeue(self, num_elements): 171 | '''Pack images and labels into a batch. 172 | 173 | Args: 174 | num_elements: the batch size. 175 | 176 | Returns: 177 | Two tensors of size (batch_size, h, w, {3, 1}) for images and masks.''' 178 | image_batch, label_batch = tf.train.batch([self.image, self.label], 179 | num_elements) 180 | return image_batch, label_batch 181 | -------------------------------------------------------------------------------- /parsing_network/kaffe/tensorflow/transformer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..errors import KaffeError, print_stderr 4 | from ..graph import GraphBuilder, NodeMapper 5 | from ..layers import NodeKind 6 | from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser, 7 | BatchNormScaleBiasFuser, BatchNormPreprocessor, ParameterNamer) 8 | 9 | from . import network 10 | 11 | 12 | def get_padding_type(kernel_params, input_shape, output_shape): 13 | '''Translates Caffe's numeric padding to one of ('SAME', 'VALID'). 14 | Caffe supports arbitrary padding values, while TensorFlow only 15 | supports 'SAME' and 'VALID' modes. So, not all Caffe paddings 16 | can be translated to TensorFlow. There are some subtleties to 17 | how the padding edge-cases are handled. These are described here: 18 | https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto 19 | ''' 20 | k_h, k_w, s_h, s_w, p_h, p_w = kernel_params 21 | s_o_h = np.ceil(input_shape.height / float(s_h)) 22 | s_o_w = np.ceil(input_shape.width / float(s_w)) 23 | if (output_shape.height == s_o_h) and (output_shape.width == s_o_w): 24 | return 'SAME' 25 | v_o_h = np.ceil((input_shape.height - k_h + 1.0) / float(s_h)) 26 | v_o_w = np.ceil((input_shape.width - k_w + 1.0) / float(s_w)) 27 | if (output_shape.height == v_o_h) and (output_shape.width == v_o_w): 28 | return 'VALID' 29 | return None 30 | 31 | 32 | class TensorFlowNode(object): 33 | '''An intermediate representation for TensorFlow operations.''' 34 | 35 | def __init__(self, op, *args, **kwargs): 36 | # A string corresponding to the TensorFlow operation 37 | self.op = op 38 | # Positional arguments for the operation 39 | self.args = args 40 | # Keyword arguments for the operation 41 | self.kwargs = list(kwargs.items()) 42 | # The source Caffe node 43 | self.node = None 44 | 45 | def format(self, arg): 46 | '''Returns a string representation for the given value.''' 47 | return "'%s'" % arg if isinstance(arg, basestring) else str(arg) 48 | 49 | def pair(self, key, value): 50 | '''Returns key=formatted(value).''' 51 | return '%s=%s' % (key, self.format(value)) 52 | 53 | def emit(self): 54 | '''Emits the Python source for this node.''' 55 | # Format positional arguments 56 | args = map(self.format, self.args) 57 | # Format any keyword arguments 58 | if self.kwargs: 59 | args += [self.pair(k, v) for k, v in self.kwargs] 60 | # Set the node name 61 | args.append(self.pair('name', self.node.name)) 62 | args = ', '.join(args) 63 | return '%s(%s)' % (self.op, args) 64 | 65 | 66 | class MaybeActivated(object): 67 | 68 | def __init__(self, node, default=True): 69 | self.inject_kwargs = {} 70 | if node.metadata.get('relu', False) != default: 71 | self.inject_kwargs['relu'] = not default 72 | 73 | def __call__(self, *args, **kwargs): 74 | kwargs.update(self.inject_kwargs) 75 | return TensorFlowNode(*args, **kwargs) 76 | 77 | 78 | class TensorFlowMapper(NodeMapper): 79 | 80 | def get_kernel_params(self, node): 81 | kernel_params = node.layer.kernel_parameters 82 | input_shape = node.get_only_parent().output_shape 83 | padding = get_padding_type(kernel_params, input_shape, node.output_shape) 84 | # Only emit the padding if it's not the default value. 85 | padding = {'padding': padding} if padding != network.DEFAULT_PADDING else {} 86 | return (kernel_params, padding) 87 | 88 | def map_convolution(self, node): 89 | (kernel_params, kwargs) = self.get_kernel_params(node) 90 | h = kernel_params.kernel_h 91 | w = kernel_params.kernel_w 92 | c_o = node.output_shape[1] 93 | c_i = node.parents[0].output_shape[1] 94 | group = node.parameters.group 95 | if group != 1: 96 | kwargs['group'] = group 97 | if not node.parameters.bias_term: 98 | kwargs['biased'] = False 99 | assert kernel_params.kernel_h == h 100 | assert kernel_params.kernel_w == w 101 | return MaybeActivated(node)('conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o, 102 | kernel_params.stride_h, kernel_params.stride_w, **kwargs) 103 | 104 | def map_relu(self, node): 105 | return TensorFlowNode('relu') 106 | 107 | def map_pooling(self, node): 108 | pool_type = node.parameters.pool 109 | if pool_type == 0: 110 | pool_op = 'max_pool' 111 | elif pool_type == 1: 112 | pool_op = 'avg_pool' 113 | else: 114 | # Stochastic pooling, for instance. 115 | raise KaffeError('Unsupported pooling type.') 116 | (kernel_params, padding) = self.get_kernel_params(node) 117 | return TensorFlowNode(pool_op, kernel_params.kernel_h, kernel_params.kernel_w, 118 | kernel_params.stride_h, kernel_params.stride_w, **padding) 119 | 120 | def map_inner_product(self, node): 121 | #TODO: Axis 122 | assert node.parameters.axis == 1 123 | #TODO: Unbiased 124 | assert node.parameters.bias_term == True 125 | return MaybeActivated(node)('fc', node.parameters.num_output) 126 | 127 | def map_softmax(self, node): 128 | return TensorFlowNode('softmax') 129 | 130 | def map_lrn(self, node): 131 | params = node.parameters 132 | # The window size must be an odd value. For a window 133 | # size of (2*n+1), TensorFlow defines depth_radius = n. 134 | assert params.local_size % 2 == 1 135 | # Caffe scales by (alpha/(2*n+1)), whereas TensorFlow 136 | # just scales by alpha (as does Krizhevsky's paper). 137 | # We'll account for that here. 138 | alpha = params.alpha / float(params.local_size) 139 | return TensorFlowNode('lrn', int(params.local_size / 2), alpha, params.beta) 140 | 141 | def map_concat(self, node): 142 | axis = (2, 3, 1, 0)[node.parameters.axis] 143 | return TensorFlowNode('concat', axis) 144 | 145 | def map_dropout(self, node): 146 | return TensorFlowNode('dropout', node.parameters.dropout_ratio) 147 | 148 | def map_batch_norm(self, node): 149 | scale_offset = len(node.data) == 4 150 | kwargs = {'is_training': True} if scale_offset else {'is_training': True, 'scale': False} 151 | return MaybeActivated(node, default=False)('batch_normalization', **kwargs) 152 | 153 | def map_eltwise(self, node): 154 | operations = {0: 'multiply', 1: 'add', 2: 'max'} 155 | op_code = node.parameters.operation 156 | try: 157 | return TensorFlowNode(operations[op_code]) 158 | except KeyError: 159 | raise KaffeError('Unknown elementwise operation: {}'.format(op_code)) 160 | 161 | def commit(self, chains): 162 | return chains 163 | 164 | 165 | class TensorFlowEmitter(object): 166 | 167 | def __init__(self, tab=None): 168 | self.tab = tab or ' ' * 4 169 | self.prefix = '' 170 | 171 | def indent(self): 172 | self.prefix += self.tab 173 | 174 | def outdent(self): 175 | self.prefix = self.prefix[:-len(self.tab)] 176 | 177 | def statement(self, s): 178 | return self.prefix + s + '\n' 179 | 180 | def emit_imports(self): 181 | return self.statement('from kaffe.tensorflow import Network\n') 182 | 183 | def emit_class_def(self, name): 184 | return self.statement('class %s(Network):' % (name)) 185 | 186 | def emit_setup_def(self): 187 | return self.statement('def setup(self):') 188 | 189 | def emit_parents(self, chain): 190 | assert len(chain) 191 | s = '(self.feed(' 192 | sep = ', \n' + self.prefix + (' ' * len(s)) 193 | s += sep.join(["'%s'" % parent.name for parent in chain[0].node.parents]) 194 | return self.statement(s + ')') 195 | 196 | def emit_node(self, node): 197 | return self.statement(' ' * 5 + '.' + node.emit()) 198 | 199 | def emit(self, name, chains): 200 | s = self.emit_imports() 201 | s += self.emit_class_def(name) 202 | self.indent() 203 | s += self.emit_setup_def() 204 | self.indent() 205 | blocks = [] 206 | for chain in chains: 207 | b = '' 208 | b += self.emit_parents(chain) 209 | for node in chain: 210 | b += self.emit_node(node) 211 | blocks.append(b[:-1] + ')') 212 | s = s + '\n\n'.join(blocks) 213 | return s 214 | 215 | 216 | class TensorFlowTransformer(object): 217 | 218 | def __init__(self, def_path, data_path, verbose=True, phase='test'): 219 | self.verbose = verbose 220 | self.phase = phase 221 | self.load(def_path, data_path, phase) 222 | self.params = None 223 | self.source = None 224 | 225 | def load(self, def_path, data_path, phase): 226 | # Build the graph 227 | graph = GraphBuilder(def_path, phase).build() 228 | 229 | if data_path is not None: 230 | # Load and associate learned parameters 231 | graph = DataInjector(def_path, data_path)(graph) 232 | 233 | # Transform the graph 234 | transformers = [ 235 | # Fuse split batch normalization layers 236 | BatchNormScaleBiasFuser(), 237 | 238 | # Fuse ReLUs 239 | # TODO: Move non-linearity application to layer wrapper, allowing 240 | # any arbitrary operation to be optionally activated. 241 | ReLUFuser(allowed_parent_types=[NodeKind.Convolution, NodeKind.InnerProduct, 242 | NodeKind.BatchNorm]), 243 | 244 | # Rename nodes 245 | # Slashes are used for scoping in TensorFlow. Replace slashes 246 | # in node names with underscores. 247 | # (Caffe's GoogLeNet implementation uses slashes) 248 | NodeRenamer(lambda node: node.name.replace('/', '_')) 249 | ] 250 | self.graph = graph.transformed(transformers) 251 | 252 | # Display the graph 253 | if self.verbose: 254 | print_stderr(self.graph) 255 | 256 | def transform_data(self): 257 | if self.params is None: 258 | transformers = [ 259 | 260 | # Reshape the parameters to TensorFlow's ordering 261 | DataReshaper({ 262 | # (c_o, c_i, h, w) -> (h, w, c_i, c_o) 263 | NodeKind.Convolution: (2, 3, 1, 0), 264 | 265 | # (c_o, c_i) -> (c_i, c_o) 266 | NodeKind.InnerProduct: (1, 0) 267 | }), 268 | 269 | # Pre-process batch normalization data 270 | BatchNormPreprocessor(), 271 | 272 | # Convert parameters to dictionaries 273 | ParameterNamer(), 274 | ] 275 | self.graph = self.graph.transformed(transformers) 276 | self.params = {node.name: node.data for node in self.graph.nodes if node.data} 277 | return self.params 278 | 279 | def transform_source(self): 280 | if self.source is None: 281 | mapper = TensorFlowMapper(self.graph) 282 | chains = mapper.map() 283 | emitter = TensorFlowEmitter() 284 | self.source = emitter.emit(self.graph.name, chains) 285 | return self.source 286 | -------------------------------------------------------------------------------- /parsing_network/kaffe/transformers.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A collection of graph transforms. 3 | 4 | A transformer is a callable that accepts a graph and returns a transformed version. 5 | ''' 6 | 7 | import numpy as np 8 | 9 | from .caffe import get_caffe_resolver, has_pycaffe 10 | from .errors import KaffeError, print_stderr 11 | from .layers import NodeKind 12 | 13 | 14 | class DataInjector(object): 15 | ''' 16 | Associates parameters loaded from a .caffemodel file with their corresponding nodes. 17 | ''' 18 | 19 | def __init__(self, def_path, data_path): 20 | # The .prototxt file defining the graph 21 | self.def_path = def_path 22 | # The .caffemodel file containing the learned parameters 23 | self.data_path = data_path 24 | # Set to true if the fallback protocol-buffer based backend was used 25 | self.did_use_pb = False 26 | # A list containing (layer name, parameters) tuples 27 | self.params = None 28 | # Load the parameters 29 | self.load() 30 | 31 | def load(self): 32 | if has_pycaffe(): 33 | self.load_using_caffe() 34 | else: 35 | self.load_using_pb() 36 | 37 | def load_using_caffe(self): 38 | caffe = get_caffe_resolver().caffe 39 | net = caffe.Net(self.def_path, self.data_path, caffe.TEST) 40 | data = lambda blob: blob.data 41 | self.params = [(k, map(data, v)) for k, v in net.params.items()] 42 | 43 | def load_using_pb(self): 44 | data = get_caffe_resolver().NetParameter() 45 | data.MergeFromString(open(self.data_path, 'rb').read()) 46 | pair = lambda layer: (layer.name, self.normalize_pb_data(layer)) 47 | layers = data.layers or data.layer 48 | self.params = [pair(layer) for layer in layers if layer.blobs] 49 | self.did_use_pb = True 50 | 51 | def normalize_pb_data(self, layer): 52 | transformed = [] 53 | for blob in layer.blobs: 54 | if len(blob.shape.dim): 55 | dims = blob.shape.dim 56 | c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims)) 57 | else: 58 | c_o = blob.num 59 | c_i = blob.channels 60 | h = blob.height 61 | w = blob.width 62 | data = np.array(blob.data, dtype=np.float32).reshape(c_o, c_i, h, w) 63 | transformed.append(data) 64 | return transformed 65 | 66 | def adjust_parameters(self, node, data): 67 | if not self.did_use_pb: 68 | return data 69 | # When using the protobuf-backend, each parameter initially has four dimensions. 70 | # In certain cases (like FC layers), we want to eliminate the singleton dimensions. 71 | # This implementation takes care of the common cases. However, it does leave the 72 | # potential for future issues. 73 | # The Caffe-backend does not suffer from this problem. 74 | data = list(data) 75 | squeeze_indices = [1] # Squeeze biases. 76 | if node.kind == NodeKind.InnerProduct: 77 | squeeze_indices.append(0) # Squeeze FC. 78 | for idx in squeeze_indices: 79 | data[idx] = np.squeeze(data[idx]) 80 | return data 81 | 82 | def __call__(self, graph): 83 | for layer_name, data in self.params: 84 | if layer_name in graph: 85 | node = graph.get_node(layer_name) 86 | node.data = self.adjust_parameters(node, data) 87 | else: 88 | print_stderr('Ignoring parameters for non-existent layer: %s' % layer_name) 89 | return graph 90 | 91 | 92 | class DataReshaper(object): 93 | 94 | def __init__(self, mapping, replace=True): 95 | # A dictionary mapping NodeKind to the transposed order. 96 | self.mapping = mapping 97 | # The node kinds eligible for reshaping 98 | self.reshaped_node_types = self.mapping.keys() 99 | # If true, the reshaped data will replace the old one. 100 | # Otherwise, it's set to the reshaped_data attribute. 101 | self.replace = replace 102 | 103 | def has_spatial_parent(self, node): 104 | try: 105 | parent = node.get_only_parent() 106 | s = parent.output_shape 107 | return s.height > 1 or s.width > 1 108 | except KaffeError: 109 | return False 110 | 111 | def map(self, node_kind): 112 | try: 113 | return self.mapping[node_kind] 114 | except KeyError: 115 | raise KaffeError('Ordering not found for node kind: {}'.format(node_kind)) 116 | 117 | def __call__(self, graph): 118 | for node in graph.nodes: 119 | if node.data is None: 120 | continue 121 | if node.kind not in self.reshaped_node_types: 122 | # Check for 2+ dimensional data 123 | if any(len(tensor.shape) > 1 for tensor in node.data): 124 | print_stderr('Warning: parmaters not reshaped for node: {}'.format(node)) 125 | continue 126 | transpose_order = self.map(node.kind) 127 | weights = node.data[0] 128 | if (node.kind == NodeKind.InnerProduct) and self.has_spatial_parent(node): 129 | # The FC layer connected to the spatial layer needs to be 130 | # re-wired to match the new spatial ordering. 131 | in_shape = node.get_only_parent().output_shape 132 | fc_shape = weights.shape 133 | output_channels = fc_shape[0] 134 | weights = weights.reshape((output_channels, in_shape.channels, in_shape.height, 135 | in_shape.width)) 136 | weights = weights.transpose(self.map(NodeKind.Convolution)) 137 | node.reshaped_data = weights.reshape(fc_shape[transpose_order[0]], 138 | fc_shape[transpose_order[1]]) 139 | else: 140 | node.reshaped_data = weights.transpose(transpose_order) 141 | 142 | if self.replace: 143 | for node in graph.nodes: 144 | if hasattr(node, 'reshaped_data'): 145 | # Set the weights 146 | node.data[0] = node.reshaped_data 147 | del node.reshaped_data 148 | return graph 149 | 150 | 151 | class SubNodeFuser(object): 152 | ''' 153 | An abstract helper for merging a single-child with its single-parent. 154 | ''' 155 | 156 | def __call__(self, graph): 157 | nodes = graph.nodes 158 | fused_nodes = [] 159 | for node in nodes: 160 | if len(node.parents) != 1: 161 | # We're only fusing nodes with single parents 162 | continue 163 | parent = node.get_only_parent() 164 | if len(parent.children) != 1: 165 | # We can only fuse a node if its parent's 166 | # value isn't used by any other node. 167 | continue 168 | if not self.is_eligible_pair(parent, node): 169 | continue 170 | # Rewrite the fused node's children to its parent. 171 | for child in node.children: 172 | child.parents.remove(node) 173 | parent.add_child(child) 174 | # Disconnect the fused node from the graph. 175 | parent.children.remove(node) 176 | fused_nodes.append(node) 177 | # Let the sub-class merge the fused node in any arbitrary way. 178 | self.merge(parent, node) 179 | transformed_nodes = [node for node in nodes if node not in fused_nodes] 180 | return graph.replaced(transformed_nodes) 181 | 182 | def is_eligible_pair(self, parent, child): 183 | '''Returns true if this parent/child pair is eligible for fusion.''' 184 | raise NotImplementedError('Must be implemented by subclass.') 185 | 186 | def merge(self, parent, child): 187 | '''Merge the child node into the parent.''' 188 | raise NotImplementedError('Must be implemented by subclass') 189 | 190 | 191 | class ReLUFuser(SubNodeFuser): 192 | ''' 193 | Fuses rectified linear units with their parent nodes. 194 | ''' 195 | 196 | def __init__(self, allowed_parent_types=None): 197 | # Fuse ReLUs when the parent node is one of the given types. 198 | # If None, all node types are eligible. 199 | self.allowed_parent_types = allowed_parent_types 200 | 201 | def is_eligible_pair(self, parent, child): 202 | return ((self.allowed_parent_types is None or parent.kind in self.allowed_parent_types) and 203 | child.kind == NodeKind.ReLU) 204 | 205 | def merge(self, parent, _): 206 | parent.metadata['relu'] = True 207 | 208 | 209 | class BatchNormScaleBiasFuser(SubNodeFuser): 210 | ''' 211 | The original batch normalization paper includes two learned 212 | parameters: a scaling factor \gamma and a bias \beta. 213 | Caffe's implementation does not include these two. However, it is commonly 214 | replicated by adding a scaling+bias layer immidiately after the batch norm. 215 | 216 | This fuser merges the scaling+bias layer with the batch norm. 217 | ''' 218 | 219 | def is_eligible_pair(self, parent, child): 220 | return (parent.kind == NodeKind.BatchNorm and child.kind == NodeKind.Scale and 221 | child.parameters.axis == 1 and child.parameters.bias_term == True) 222 | 223 | def merge(self, parent, child): 224 | parent.scale_bias_node = child 225 | 226 | 227 | class BatchNormPreprocessor(object): 228 | ''' 229 | Prescale batch normalization parameters. 230 | Concatenate gamma (scale) and beta (bias) terms if set. 231 | ''' 232 | 233 | def __call__(self, graph): 234 | for node in graph.nodes: 235 | if node.kind != NodeKind.BatchNorm: 236 | continue 237 | assert node.data is not None 238 | assert len(node.data) == 3 239 | mean, variance, scale = node.data 240 | # Prescale the stats 241 | scaling_factor = 1.0 / scale if scale != 0 else 0 242 | mean *= scaling_factor 243 | variance *= scaling_factor 244 | # Replace with the updated values 245 | node.data = [mean, variance] 246 | if hasattr(node, 'scale_bias_node'): 247 | # Include the scale and bias terms 248 | gamma, beta = node.scale_bias_node.data 249 | node.data += [gamma, beta] 250 | return graph 251 | 252 | 253 | class NodeRenamer(object): 254 | ''' 255 | Renames nodes in the graph using a given unary function that 256 | accepts a node and returns its new name. 257 | ''' 258 | 259 | def __init__(self, renamer): 260 | self.renamer = renamer 261 | 262 | def __call__(self, graph): 263 | for node in graph.nodes: 264 | node.name = self.renamer(node) 265 | return graph 266 | 267 | 268 | class ParameterNamer(object): 269 | ''' 270 | Convert layer data arrays to a dictionary mapping parameter names to their values. 271 | ''' 272 | 273 | def __call__(self, graph): 274 | for node in graph.nodes: 275 | if node.data is None: 276 | continue 277 | if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct): 278 | names = ('weights',) 279 | if node.parameters.bias_term: 280 | names += ('biases',) 281 | elif node.kind == NodeKind.BatchNorm: 282 | names = ('moving_mean', 'moving_variance') 283 | if len(node.data) == 4: 284 | names += ('gamma', 'beta') 285 | else: 286 | print_stderr('WARNING: Unhandled parameters: {}'.format(node.kind)) 287 | continue 288 | assert len(names) == len(node.data) 289 | node.data = dict(zip(names, node.data)) 290 | return graph 291 | -------------------------------------------------------------------------------- /parsing_network/kaffe/tensorflow/network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | slim = tf.contrib.slim 4 | 5 | DEFAULT_PADDING = 'SAME' 6 | 7 | 8 | def layer(op): 9 | '''Decorator for composable network layers.''' 10 | 11 | def layer_decorated(self, *args, **kwargs): 12 | # Automatically set a name if not provided. 13 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 14 | # Figure out the layer inputs. 15 | if len(self.terminals) == 0: 16 | raise RuntimeError('No input variables found for layer %s.' % name) 17 | elif len(self.terminals) == 1: 18 | layer_input = self.terminals[0] 19 | else: 20 | layer_input = list(self.terminals) 21 | # Perform the operation and get the output. 22 | layer_output = op(self, layer_input, *args, **kwargs) 23 | # Add to layer LUT. 24 | self.layers[name] = layer_output 25 | # This output is now the input for the next layer. 26 | self.feed(layer_output) 27 | # Return self for chained calls. 28 | return self 29 | 30 | return layer_decorated 31 | 32 | 33 | class Network(object): 34 | 35 | def __init__(self, inputs, trainable=True, is_training=False, num_classes=21): 36 | # The input nodes for this network 37 | self.inputs = inputs 38 | # The current list of terminal nodes 39 | self.terminals = [] 40 | # Mapping from layer names to layers 41 | self.layers = dict(inputs) 42 | # If true, the resulting variables are set as trainable 43 | self.trainable = trainable 44 | # Switch variable for dropout 45 | self.use_dropout = tf.placeholder_with_default(tf.constant(1.0), 46 | shape=[], 47 | name='use_dropout') 48 | self.setup(is_training, num_classes) 49 | 50 | def setup(self, is_training): 51 | '''Construct the network. ''' 52 | raise NotImplementedError('Must be implemented by the subclass.') 53 | 54 | def load(self, data_path, session, ignore_missing=False): 55 | '''Load network weights. 56 | data_path: The path to the numpy-serialized network weights 57 | session: The current TensorFlow session 58 | ignore_missing: If true, serialized weights for missing layers are ignored. 59 | ''' 60 | data_dict = np.load(data_path).item() 61 | for op_name in data_dict: 62 | with tf.variable_scope(op_name, reuse=True): 63 | for param_name, data in data_dict[op_name].iteritems(): 64 | try: 65 | var = tf.get_variable(param_name) 66 | session.run(var.assign(data)) 67 | except ValueError: 68 | if not ignore_missing: 69 | raise 70 | 71 | def feed(self, *args): 72 | '''Set the input(s) for the next operation by replacing the terminal nodes. 73 | The arguments can be either layer names or the actual layers. 74 | ''' 75 | assert len(args) != 0 76 | self.terminals = [] 77 | for fed_layer in args: 78 | if isinstance(fed_layer, basestring): 79 | try: 80 | fed_layer = self.layers[fed_layer] 81 | except KeyError: 82 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 83 | self.terminals.append(fed_layer) 84 | return self 85 | 86 | def get_output(self): 87 | '''Returns the current network output.''' 88 | return self.terminals[-1] 89 | 90 | def get_unique_name(self, prefix): 91 | '''Returns an index-suffixed unique name for the given prefix. 92 | This is used for auto-generating layer names based on the type-prefix. 93 | ''' 94 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 95 | return '%s_%d' % (prefix, ident) 96 | 97 | def make_var(self, name, shape): 98 | '''Creates a new TensorFlow variable.''' 99 | return tf.get_variable(name, shape, trainable=self.trainable) 100 | 101 | def validate_padding(self, padding): 102 | '''Verifies that the padding is one of the supported ones.''' 103 | assert padding in ('SAME', 'VALID') 104 | 105 | @layer 106 | def conv(self, 107 | input, 108 | k_h, 109 | k_w, 110 | c_o, 111 | s_h, 112 | s_w, 113 | name, 114 | relu=True, 115 | padding=DEFAULT_PADDING, 116 | group=1, 117 | biased=True): 118 | # Verify that the padding is acceptable 119 | self.validate_padding(padding) 120 | # Get the number of channels in the input 121 | c_i = input.get_shape()[-1] 122 | # Verify that the grouping parameter is valid 123 | assert c_i % group == 0 124 | assert c_o % group == 0 125 | # Convolution for a given input and kernel 126 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) 127 | with tf.variable_scope(name) as scope: 128 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o]) 129 | if group == 1: 130 | # This is the common-case. Convolve the input without any further complications. 131 | output = convolve(input, kernel) 132 | else: 133 | # Split the input into groups and then convolve each of them independently 134 | input_groups = tf.split(3, group, input) 135 | kernel_groups = tf.split(3, group, kernel) 136 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] 137 | # Concatenate the groups 138 | output = tf.concat(3, output_groups) 139 | # Add the biases 140 | if biased: 141 | biases = self.make_var('biases', [c_o]) 142 | output = tf.nn.bias_add(output, biases) 143 | if relu: 144 | # ReLU non-linearity 145 | output = tf.nn.relu(output, name=scope.name) 146 | return output 147 | 148 | @layer 149 | def atrous_conv(self, 150 | input, 151 | k_h, 152 | k_w, 153 | c_o, 154 | dilation, 155 | name, 156 | relu=True, 157 | padding=DEFAULT_PADDING, 158 | group=1, 159 | biased=True): 160 | # Verify that the padding is acceptable 161 | self.validate_padding(padding) 162 | # Get the number of channels in the input 163 | c_i = input.get_shape()[-1] 164 | # Verify that the grouping parameter is valid 165 | assert c_i % group == 0 166 | assert c_o % group == 0 167 | # Convolution for a given input and kernel 168 | convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=padding) 169 | with tf.variable_scope(name) as scope: 170 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o]) 171 | if group == 1: 172 | # This is the common-case. Convolve the input without any further complications. 173 | output = convolve(input, kernel) 174 | else: 175 | # Split the input into groups and then convolve each of them independently 176 | input_groups = tf.split(3, group, input) 177 | kernel_groups = tf.split(3, group, kernel) 178 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] 179 | # Concatenate the groups 180 | output = tf.concat(3, output_groups) 181 | # Add the biases 182 | if biased: 183 | biases = self.make_var('biases', [c_o]) 184 | output = tf.nn.bias_add(output, biases) 185 | if relu: 186 | # ReLU non-linearity 187 | output = tf.nn.relu(output, name=scope.name) 188 | return output 189 | 190 | @layer 191 | def relu(self, input, name): 192 | return tf.nn.relu(input, name=name) 193 | 194 | @layer 195 | def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): 196 | self.validate_padding(padding) 197 | return tf.nn.max_pool(input, 198 | ksize=[1, k_h, k_w, 1], 199 | strides=[1, s_h, s_w, 1], 200 | padding=padding, 201 | name=name) 202 | 203 | @layer 204 | def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): 205 | self.validate_padding(padding) 206 | return tf.nn.avg_pool(input, 207 | ksize=[1, k_h, k_w, 1], 208 | strides=[1, s_h, s_w, 1], 209 | padding=padding, 210 | name=name) 211 | 212 | @layer 213 | def lrn(self, input, radius, alpha, beta, name, bias=1.0): 214 | return tf.nn.local_response_normalization(input, 215 | depth_radius=radius, 216 | alpha=alpha, 217 | beta=beta, 218 | bias=bias, 219 | name=name) 220 | 221 | @layer 222 | def concat(self, inputs, axis, name): 223 | return tf.concat(concat_dim=axis, values=inputs, name=name) 224 | 225 | @layer 226 | def add(self, inputs, name): 227 | return tf.add_n(inputs, name=name) 228 | 229 | @layer 230 | def fc(self, input, num_out, name, relu=True): 231 | with tf.variable_scope(name) as scope: 232 | input_shape = input.get_shape() 233 | if input_shape.ndims == 4: 234 | # The input is spatial. Vectorize it first. 235 | dim = 1 236 | for d in input_shape[1:].as_list(): 237 | dim *= d 238 | feed_in = tf.reshape(input, [-1, dim]) 239 | else: 240 | feed_in, dim = (input, input_shape[-1].value) 241 | weights = self.make_var('weights', shape=[dim, num_out]) 242 | biases = self.make_var('biases', [num_out]) 243 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b 244 | fc = op(feed_in, weights, biases, name=scope.name) 245 | return fc 246 | 247 | @layer 248 | def softmax(self, input, name): 249 | input_shape = map(lambda v: v.value, input.get_shape()) 250 | if len(input_shape) > 2: 251 | # For certain models (like NiN), the singleton spatial dimensions 252 | # need to be explicitly squeezed, since they're not broadcast-able 253 | # in TensorFlow's NHWC ordering (unlike Caffe's NCHW). 254 | if input_shape[1] == 1 and input_shape[2] == 1: 255 | input = tf.squeeze(input, squeeze_dims=[1, 2]) 256 | else: 257 | raise ValueError('Rank 2 tensor input expected for softmax!') 258 | return tf.nn.softmax(input, name) 259 | 260 | @layer 261 | def batch_normalization(self, input, name, is_training, activation_fn=None, scale=True): 262 | with tf.variable_scope(name) as scope: 263 | output = slim.batch_norm( 264 | input, 265 | activation_fn=activation_fn, 266 | is_training=is_training, 267 | updates_collections=None, 268 | scale=scale, 269 | scope=scope) 270 | return output 271 | 272 | @layer 273 | def dropout(self, input, keep_prob, name): 274 | keep = 1 - self.use_dropout + (self.use_dropout * keep_prob) 275 | return tf.nn.dropout(input, keep, name=name) 276 | -------------------------------------------------------------------------------- /parsing_network/kaffe/graph.py: -------------------------------------------------------------------------------- 1 | from google.protobuf import text_format 2 | 3 | from .caffe import get_caffe_resolver 4 | from .errors import KaffeError, print_stderr 5 | from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch 6 | from .shapes import TensorShape 7 | 8 | class Node(object): 9 | 10 | def __init__(self, name, kind, layer=None): 11 | self.name = name 12 | self.kind = kind 13 | self.layer = LayerAdapter(layer, kind) if layer else None 14 | self.parents = [] 15 | self.children = [] 16 | self.data = None 17 | self.output_shape = None 18 | self.metadata = {} 19 | 20 | def add_parent(self, parent_node): 21 | assert parent_node not in self.parents 22 | self.parents.append(parent_node) 23 | if self not in parent_node.children: 24 | parent_node.children.append(self) 25 | 26 | def add_child(self, child_node): 27 | assert child_node not in self.children 28 | self.children.append(child_node) 29 | if self not in child_node.parents: 30 | child_node.parents.append(self) 31 | 32 | def get_only_parent(self): 33 | if len(self.parents) != 1: 34 | raise KaffeError('Node (%s) expected to have 1 parent. Found %s.' % 35 | (self, len(self.parents))) 36 | return self.parents[0] 37 | 38 | @property 39 | def parameters(self): 40 | if self.layer is not None: 41 | return self.layer.parameters 42 | return None 43 | 44 | def __str__(self): 45 | return '[%s] %s' % (self.kind, self.name) 46 | 47 | def __repr__(self): 48 | return '%s (0x%x)' % (self.name, id(self)) 49 | 50 | 51 | class Graph(object): 52 | 53 | def __init__(self, nodes=None, name=None): 54 | self.nodes = nodes or [] 55 | self.node_lut = {node.name: node for node in self.nodes} 56 | self.name = name 57 | 58 | def add_node(self, node): 59 | self.nodes.append(node) 60 | self.node_lut[node.name] = node 61 | 62 | def get_node(self, name): 63 | try: 64 | return self.node_lut[name] 65 | except KeyError: 66 | raise KaffeError('Layer not found: %s' % name) 67 | 68 | def get_input_nodes(self): 69 | return [node for node in self.nodes if len(node.parents) == 0] 70 | 71 | def get_output_nodes(self): 72 | return [node for node in self.nodes if len(node.children) == 0] 73 | 74 | def topologically_sorted(self): 75 | sorted_nodes = [] 76 | unsorted_nodes = list(self.nodes) 77 | temp_marked = set() 78 | perm_marked = set() 79 | 80 | def visit(node): 81 | if node in temp_marked: 82 | raise KaffeError('Graph is not a DAG.') 83 | if node in perm_marked: 84 | return 85 | temp_marked.add(node) 86 | for child in node.children: 87 | visit(child) 88 | perm_marked.add(node) 89 | temp_marked.remove(node) 90 | sorted_nodes.insert(0, node) 91 | 92 | while len(unsorted_nodes): 93 | visit(unsorted_nodes.pop()) 94 | return sorted_nodes 95 | 96 | def compute_output_shapes(self): 97 | sorted_nodes = self.topologically_sorted() 98 | for node in sorted_nodes: 99 | node.output_shape = TensorShape(*NodeKind.compute_output_shape(node)) 100 | 101 | def replaced(self, new_nodes): 102 | return Graph(nodes=new_nodes, name=self.name) 103 | 104 | def transformed(self, transformers): 105 | graph = self 106 | for transformer in transformers: 107 | graph = transformer(graph) 108 | if graph is None: 109 | raise KaffeError('Transformer failed: {}'.format(transformer)) 110 | assert isinstance(graph, Graph) 111 | return graph 112 | 113 | def __contains__(self, key): 114 | return key in self.node_lut 115 | 116 | def __str__(self): 117 | hdr = '{:<20} {:<30} {:>20} {:>20}'.format('Type', 'Name', 'Param', 'Output') 118 | s = [hdr, '-' * 94] 119 | for node in self.topologically_sorted(): 120 | # If the node has learned parameters, display the first one's shape. 121 | # In case of convolutions, this corresponds to the weights. 122 | data_shape = node.data[0].shape if node.data else '--' 123 | out_shape = node.output_shape or '--' 124 | s.append('{:<20} {:<30} {:>20} {:>20}'.format(node.kind, node.name, data_shape, 125 | tuple(out_shape))) 126 | return '\n'.join(s) 127 | 128 | 129 | class GraphBuilder(object): 130 | '''Constructs a model graph from a Caffe protocol buffer definition.''' 131 | 132 | def __init__(self, def_path, phase='test'): 133 | ''' 134 | def_path: Path to the model definition (.prototxt) 135 | data_path: Path to the model data (.caffemodel) 136 | phase: Either 'test' or 'train'. Used for filtering phase-specific nodes. 137 | ''' 138 | self.def_path = def_path 139 | self.phase = phase 140 | self.load() 141 | 142 | def load(self): 143 | '''Load the layer definitions from the prototxt.''' 144 | self.params = get_caffe_resolver().NetParameter() 145 | with open(self.def_path, 'rb') as def_file: 146 | text_format.Merge(def_file.read(), self.params) 147 | 148 | def filter_layers(self, layers): 149 | '''Filter out layers based on the current phase.''' 150 | phase_map = {0: 'train', 1: 'test'} 151 | filtered_layer_names = set() 152 | filtered_layers = [] 153 | for layer in layers: 154 | phase = self.phase 155 | if len(layer.include): 156 | phase = phase_map[layer.include[0].phase] 157 | if len(layer.exclude): 158 | phase = phase_map[1 - layer.include[0].phase] 159 | exclude = (phase != self.phase) 160 | # Dropout layers appear in a fair number of Caffe 161 | # test-time networks. These are just ignored. We'll 162 | # filter them out here. 163 | if (not exclude) and (phase == 'test'): 164 | exclude = (layer.type == LayerType.Dropout) 165 | if not exclude: 166 | filtered_layers.append(layer) 167 | # Guard against dupes. 168 | assert layer.name not in filtered_layer_names 169 | filtered_layer_names.add(layer.name) 170 | return filtered_layers 171 | 172 | def make_node(self, layer): 173 | '''Create a graph node for the given layer.''' 174 | kind = NodeKind.map_raw_kind(layer.type) 175 | if kind is None: 176 | raise KaffeError('Unknown layer type encountered: %s' % layer.type) 177 | # We want to use the layer's top names (the "output" names), rather than the 178 | # name attribute, which is more of readability thing than a functional one. 179 | # Other layers will refer to a node by its "top name". 180 | return Node(layer.name, kind, layer=layer) 181 | 182 | def make_input_nodes(self): 183 | ''' 184 | Create data input nodes. 185 | 186 | This method is for old-style inputs, where the input specification 187 | was not treated as a first-class layer in the prototext. 188 | Newer models use the "Input layer" type. 189 | ''' 190 | nodes = [Node(name, NodeKind.Data) for name in self.params.input] 191 | if len(nodes): 192 | input_dim = map(int, self.params.input_dim) 193 | if not input_dim: 194 | if len(self.params.input_shape) > 0: 195 | input_dim = map(int, self.params.input_shape[0].dim) 196 | else: 197 | raise KaffeError('Dimensions for input not specified.') 198 | for node in nodes: 199 | node.output_shape = tuple(input_dim) 200 | return nodes 201 | 202 | def build(self): 203 | ''' 204 | Builds the graph from the Caffe layer definitions. 205 | ''' 206 | # Get the layers 207 | layers = self.params.layers or self.params.layer 208 | # Filter out phase-excluded layers 209 | layers = self.filter_layers(layers) 210 | # Get any separately-specified input layers 211 | nodes = self.make_input_nodes() 212 | nodes += [self.make_node(layer) for layer in layers] 213 | # Initialize the graph 214 | graph = Graph(nodes=nodes, name=self.params.name) 215 | # Connect the nodes 216 | # 217 | # A note on layers and outputs: 218 | # In Caffe, each layer can produce multiple outputs ("tops") from a set of inputs 219 | # ("bottoms"). The bottoms refer to other layers' tops. The top can rewrite a bottom 220 | # (in case of in-place operations). Note that the layer's name is not used for establishing 221 | # any connectivity. It's only used for data association. By convention, a layer with a 222 | # single top will often use the same name (although this is not required). 223 | # 224 | # The current implementation only supports single-output nodes (note that a node can still 225 | # have multiple children, since multiple child nodes can refer to the single top's name). 226 | node_outputs = {} 227 | for layer in layers: 228 | node = graph.get_node(layer.name) 229 | for input_name in layer.bottom: 230 | assert input_name != layer.name 231 | parent_node = node_outputs.get(input_name) 232 | if (parent_node is None) or (parent_node == node): 233 | parent_node = graph.get_node(input_name) 234 | node.add_parent(parent_node) 235 | if len(layer.top)>1: 236 | raise KaffeError('Multiple top nodes are not supported.') 237 | for output_name in layer.top: 238 | if output_name == layer.name: 239 | # Output is named the same as the node. No further action required. 240 | continue 241 | # There are two possibilities here: 242 | # 243 | # Case 1: output_name refers to another node in the graph. 244 | # This is an "in-place operation" that overwrites an existing node. 245 | # This would create a cycle in the graph. We'll undo the in-placing 246 | # by substituting this node wherever the overwritten node is referenced. 247 | # 248 | # Case 2: output_name violates the convention layer.name == output_name. 249 | # Since we are working in the single-output regime, we will can rename it to 250 | # match the layer name. 251 | # 252 | # For both cases, future references to this top re-routes to this node. 253 | node_outputs[output_name] = node 254 | 255 | graph.compute_output_shapes() 256 | return graph 257 | 258 | 259 | class NodeMapper(NodeDispatch): 260 | 261 | def __init__(self, graph): 262 | self.graph = graph 263 | 264 | def map(self): 265 | nodes = self.graph.topologically_sorted() 266 | # Remove input nodes - we'll handle them separately. 267 | input_nodes = self.graph.get_input_nodes() 268 | nodes = [t for t in nodes if t not in input_nodes] 269 | # Decompose DAG into chains. 270 | chains = [] 271 | for node in nodes: 272 | attach_to_chain = None 273 | if len(node.parents) == 1: 274 | parent = node.get_only_parent() 275 | for chain in chains: 276 | if chain[-1] == parent: 277 | # Node is part of an existing chain. 278 | attach_to_chain = chain 279 | break 280 | if attach_to_chain is None: 281 | # Start a new chain for this node. 282 | attach_to_chain = [] 283 | chains.append(attach_to_chain) 284 | attach_to_chain.append(node) 285 | # Map each chain. 286 | mapped_chains = [] 287 | for chain in chains: 288 | mapped_chains.append(self.map_chain(chain)) 289 | return self.commit(mapped_chains) 290 | 291 | def map_chain(self, chain): 292 | return [self.map_node(node) for node in chain] 293 | 294 | def map_node(self, node): 295 | map_func = self.get_handler(node.kind, 'map') 296 | mapped_node = map_func(node) 297 | assert mapped_node is not None 298 | mapped_node.node = node 299 | return mapped_node 300 | 301 | def commit(self, mapped_chains): 302 | raise NotImplementedError('Must be implemented by subclass.') 303 | -------------------------------------------------------------------------------- /parsing_network/train.py: -------------------------------------------------------------------------------- 1 | """Training script for the DeepLab-ResNet network on the PASCAL VOC dataset 2 | for semantic image segmentation. 3 | 4 | This script trains the model using augmented PASCAL VOC, 5 | which contains approximately 10000 images for training and 1500 images for validation. 6 | """ 7 | 8 | from __future__ import print_function 9 | 10 | import argparse 11 | from datetime import datetime 12 | import os 13 | import sys 14 | import time 15 | 16 | import tensorflow as tf 17 | import numpy as np 18 | 19 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, inv_preprocess, prepare_label 20 | 21 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 22 | 23 | BATCH_SIZE = 8 24 | DATA_DIRECTORY = '/home/VOCdevkit' 25 | DATA_LIST_PATH = './dataset/pascal_train.txt' 26 | IGNORE_LABEL = 255 27 | INPUT_SIZE = '321,321' 28 | LEARNING_RATE = 2.5e-4 29 | MOMENTUM = 0.9 30 | NUM_CLASSES = 7 31 | NUM_EPOCHS = 90 32 | POWER = 0.9 33 | RANDOM_SEED = 1435 34 | RESTORE_FROM = './deeplab_resnet.ckpt' 35 | SAVE_NUM_IMAGES = 2 36 | SAVE_PRED_EVERY = 10000 37 | SNAPSHOT_DIR = './snapshots/' 38 | WEIGHT_DECAY = 0.0005 39 | 40 | 41 | def get_arguments(): 42 | """Parse all the arguments provided from the CLI. 43 | 44 | Returns: 45 | A list of parsed arguments. 46 | """ 47 | parser = argparse.ArgumentParser(description="DeepLab-ResNet Network") 48 | parser.add_argument("--batch-size", type=int, default=BATCH_SIZE, 49 | help="Number of images sent to the network in one step.") 50 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY, 51 | help="Path to the directory containing the PASCAL VOC dataset.") 52 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH, 53 | help="Path to the file listing the images in the dataset.") 54 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL, 55 | help="The index of the label to ignore during the training.") 56 | parser.add_argument("--input-size", type=str, default=INPUT_SIZE, 57 | help="Comma-separated string with height and width of images.") 58 | parser.add_argument("--is-training", action="store_true", 59 | help="Whether to updates the running means and variances during the training.") 60 | parser.add_argument("--learning-rate", type=float, default=LEARNING_RATE, 61 | help="Base learning rate for training with polynomial decay.") 62 | parser.add_argument("--momentum", type=float, default=MOMENTUM, 63 | help="Momentum component of the optimiser.") 64 | parser.add_argument("--not-restore-last", action="store_true", 65 | help="Whether to not restore last (FC) layers.") 66 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES, 67 | help="Number of classes to predict (including background).") 68 | parser.add_argument("--num-epochs", type=int, default=NUM_EPOCHS, 69 | help="Number of training epochs.") 70 | parser.add_argument("--power", type=float, default=POWER, 71 | help="Decay parameter to compute the learning rate.") 72 | parser.add_argument("--random-mirror", action="store_true", 73 | help="Whether to randomly mirror the inputs during the training.") 74 | parser.add_argument("--random-scale", action="store_true", 75 | help="Whether to randomly scale the inputs during the training.") 76 | parser.add_argument("--random-seed", type=int, default=RANDOM_SEED, 77 | help="Random seed to have reproducible results.") 78 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM, 79 | help="Where restore model parameters from.") 80 | parser.add_argument("--save-num-images", type=int, default=SAVE_NUM_IMAGES, 81 | help="How many images to save.") 82 | parser.add_argument("--save-pred-every", type=int, default=SAVE_PRED_EVERY, 83 | help="Save summaries and checkpoint every often.") 84 | parser.add_argument("--snapshot-dir", type=str, default=SNAPSHOT_DIR, 85 | help="Where to save snapshots of the model.") 86 | parser.add_argument("--weight-decay", type=float, default=WEIGHT_DECAY, 87 | help="Regularisation parameter for L2-loss.") 88 | return parser.parse_args() 89 | 90 | def save(saver, sess, logdir, step): 91 | '''Save weights. 92 | 93 | Args: 94 | saver: TensorFlow Saver object. 95 | sess: TensorFlow session. 96 | logdir: path to the snapshots directory. 97 | step: current training step. 98 | ''' 99 | model_name = 'model.ckpt' 100 | checkpoint_path = os.path.join(logdir, model_name) 101 | 102 | if not os.path.exists(logdir): 103 | os.makedirs(logdir) 104 | saver.save(sess, checkpoint_path, global_step=step) 105 | print('The checkpoint has been created.') 106 | 107 | def load(saver, sess, ckpt_path): 108 | '''Load trained weights. 109 | 110 | Args: 111 | saver: TensorFlow Saver object. 112 | sess: TensorFlow session. 113 | ckpt_path: path to checkpoint file with parameters. 114 | ''' 115 | saver.restore(sess, ckpt_path) 116 | print("Restored model parameters from {}".format(ckpt_path)) 117 | 118 | def file_len(fname): 119 | with open(fname) as f: 120 | for i, l in enumerate(f): 121 | pass 122 | return i + 1 123 | 124 | def main(): 125 | """Create the model and start the training.""" 126 | args = get_arguments() 127 | 128 | h, w = map(int, args.input_size.split(',')) 129 | input_size = (h, w) 130 | num_steps = int(file_len(args.data_list) * args.num_epochs / args.batch_size) 131 | print('Total number of steps is '+str(num_steps)) 132 | tf.set_random_seed(args.random_seed) 133 | 134 | # Create queue coordinator. 135 | coord = tf.train.Coordinator() 136 | 137 | # Load reader. 138 | with tf.name_scope("create_inputs"): 139 | reader = ImageReader( 140 | args.data_dir, 141 | args.data_list, 142 | input_size, 143 | args.random_scale, 144 | args.random_mirror, 145 | args.ignore_label, 146 | IMG_MEAN, 147 | coord) 148 | image_batch, label_batch = reader.dequeue(args.batch_size) 149 | print(args.random_scale, 150 | args.random_mirror) 151 | # Create network. 152 | net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) 153 | # For a small batch size, it is better to keep 154 | # the statistics of the BN layers (running means and variances) 155 | # frozen, and to not update the values provided by the pre-trained model. 156 | # If is_training=True, the statistics will be updated during the training. 157 | # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) 158 | # if they are presented in var_list of the optimiser definition. 159 | 160 | # Predictions. 161 | raw_output = net.layers['fc1_voc12'] 162 | # Which variables to load. Running means and variances are not trainable, 163 | # thus all_variables() should be restored. 164 | restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last] 165 | all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name] 166 | fc_trainable = [v for v in all_trainable if 'fc' in v.name] 167 | conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 168 | fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 169 | fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 170 | assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable)) 171 | assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) 172 | 173 | 174 | # Predictions: ignoring all predictions with labels greater or equal than n_classes 175 | raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) 176 | label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] 177 | raw_gt = tf.reshape(label_proc, [-1,]) 178 | indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) 179 | gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) 180 | prediction = tf.gather(raw_prediction, indices) 181 | 182 | 183 | # Pixel-wise softmax loss. 184 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) 185 | l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] 186 | reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) 187 | 188 | # Processed predictions: for visualisation. 189 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) 190 | raw_output_up = tf.argmax(raw_output_up, dimension=3) 191 | pred = tf.expand_dims(raw_output_up, dim=3) 192 | 193 | # Image summary. 194 | images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) 195 | labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) 196 | preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) 197 | 198 | if not os.path.exists(args.snapshot_dir): 199 | os.mkdir(args.snapshot_dir) 200 | 201 | total_summary = tf.summary.image('images', 202 | tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), 203 | max_outputs=args.save_num_images) # Concatenate row-wise. 204 | summary_writer = tf.summary.FileWriter(args.snapshot_dir, 205 | graph=tf.get_default_graph()) 206 | 207 | # Define loss and optimisation parameters. 208 | base_lr = tf.constant(args.learning_rate) 209 | step_ph = tf.placeholder(dtype=tf.float32, shape=()) 210 | learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / num_steps), args.power)) 211 | 212 | opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) 213 | opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) 214 | opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) 215 | 216 | grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) 217 | grads_conv = grads[:len(conv_trainable)] 218 | grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))] 219 | grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] 220 | 221 | train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) 222 | train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) 223 | train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) 224 | 225 | train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) 226 | 227 | 228 | # Set up tf session and initialize variables. 229 | config = tf.ConfigProto() 230 | config.gpu_options.allow_growth = True 231 | sess = tf.Session(config=config) 232 | init = tf.global_variables_initializer() 233 | 234 | sess.run(init) 235 | 236 | # Saver for storing checkpoints of the model. 237 | saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) 238 | 239 | # Load variables if the checkpoint is provided. 240 | if args.restore_from is not None: 241 | loader = tf.train.Saver(var_list=restore_var) 242 | load(loader, sess, args.restore_from) 243 | 244 | # Start queue threads. 245 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 246 | 247 | # Iterate over training steps. 248 | for step in range(num_steps): 249 | start_time = time.time() 250 | feed_dict = { step_ph : step } 251 | 252 | if step % args.save_pred_every == 0 or step == num_steps-1: 253 | loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, train_op], feed_dict=feed_dict) 254 | summary_writer.add_summary(summary, step) 255 | save(saver, sess, args.snapshot_dir, step) 256 | else: 257 | loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) 258 | duration = time.time() - start_time 259 | print('step {:d} \t loss = {:.3f} , ({:.3f} sec/step)'.format(step, loss_value, duration)) 260 | coord.request_stop() 261 | coord.join(threads) 262 | 263 | if __name__ == '__main__': 264 | main() 265 | -------------------------------------------------------------------------------- /data_generation/generate_prior_util.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Variables and functions for prior generation. 3 | The order of keypoints is: 4 | 0-'right ankle' 1-'right knee' 2-'right hip' 3-'left hip' 4-'left knee' 5-'left ankle' 6-'pelvis' 7-'thorax' 8-'neck' 5 | 9-'head' 10-'right wrist' 11-'right elbow' 12-'right shoulder' 13-'left shoulder' 14-'left elbow' 15-'left wrist'. 6 | When pelvis is missing, we use the midpoint of two hips instead. 7 | Thorax is unused and set to be the same(0, 0). 8 | ''' 9 | 10 | import numpy as np 11 | import os 12 | import copy 13 | import cv2 14 | import random 15 | 16 | 17 | ''' 18 | Used for aligning torso. 19 | ''' 20 | RIGHT_LEG = [6, 2, 1, 0] 21 | LEFT_LEG = [6, 3, 4, 5] 22 | RIGHT_ARM = [6, 8, 12, 11, 10] 23 | LEFT_ARM = [6, 8, 13, 14, 15] 24 | HEAD = [6, 8, 9] 25 | SPINE = [6, 8] 26 | RIGHT_ARM_SPINE = [8, 12, 11, 10] 27 | LEFT_ARM_SPINE = [8, 13, 14, 15] 28 | HEAD_SPINE = [8, 9] 29 | 30 | ''' 31 | Colors for each part of body. 32 | ''' 33 | body_part_color = np.array([ 34 | [0, 0, 0], 35 | [128, 0, 0], 36 | [0, 128, 0], 37 | [128, 128, 0], 38 | [0, 0, 128], 39 | [128, 0, 128], 40 | [0, 128, 128], 41 | [128, 128, 128], 42 | [64, 0, 0], 43 | [192, 0, 0], 44 | [64, 128, 0]], 45 | dtype=np.uint8) 46 | 47 | ''' 48 | Merge 1 - 10 labels to 1 - 6 labels, 49 | e.g. left && right upper arm ==> upper arm. 50 | ''' 51 | merge_mask = np.array([0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6], dtype=np.uint8) 52 | 53 | ''' 54 | Morphing is done for each body part associated with following skeleton lines and then merged together. 55 | ''' 56 | main_skeleton_lines = [ 57 | [], 58 | [8, 9], 59 | [6, 8], 60 | [13, 14], 61 | [12, 11], 62 | [14, 15], 63 | [11, 10], 64 | [3, 4], 65 | [2, 1], 66 | [4, 5], 67 | [1, 0]] 68 | 69 | ''' 70 | Lines of skeleton. 71 | ''' 72 | skeletonLines = [ 73 | [0, 1], 74 | [1, 2], 75 | [2, 6], 76 | [6, 3], 77 | [3, 4], 78 | [4, 5], 79 | [6, 8], 80 | [8, 9], 81 | [8, 12], 82 | [12, 11], 83 | [11, 10], 84 | [8, 13], 85 | [13, 14], 86 | [14, 15] 87 | ] 88 | 89 | ''' 90 | Dictionary of (color : RGB) pair. 91 | ''' 92 | colorDict = { 93 | "purple": [255, 0, 128], 94 | "orange": [1, 96, 254], 95 | "light_blue": [255, 141, 28], 96 | "dark_blue": [232, 0, 0], 97 | "red": [0, 0, 255] 98 | } 99 | 100 | ''' 101 | Colors for each skeleton line. 102 | ''' 103 | skeletonColor = ["orange", "orange", "orange", "light_blue", "light_blue", "light_blue", 104 | "purple", "purple", "red", "red", "red", "dark_blue", "dark_blue", "dark_blue" 105 | ] 106 | 107 | 108 | def drawSkeleton(img, pose): 109 | ''' 110 | Given an image and a pose, draw the skeleton on that image. 111 | 112 | :param img: 113 | Image to draw skeleton on. 114 | :param pose: 115 | Pose of shape (1 x 32). 116 | :return: 117 | Image with skeleton lines. 118 | ''' 119 | retImg = copy.deepcopy(img) 120 | pose = pose[0] 121 | for i in range(len(skeletonLines)): 122 | a = skeletonLines[i][0] 123 | b = skeletonLines[i][1] 124 | cv2.line(retImg, (int(pose[a*2]), int(pose[a*2+1])), (int(pose[b*2]), int(pose[b*2+1])), list(map(lambda i: i*0.6, colorDict[skeletonColor[i]])), 3) 125 | return retImg 126 | 127 | 128 | def align_torso(poses): 129 | ''' 130 | Align torso length to the same(50). 131 | 132 | :param poses: 133 | 2-dimension array. The shape should be N x 32. 134 | :return: 135 | Aligned pose array. 136 | ''' 137 | poses_new = copy.deepcopy(poses) 138 | for i in range(1, 4): 139 | poses_new[:, (2 * RIGHT_LEG[i]):(2 * RIGHT_LEG[i] + 2)] = poses[:, (2 * RIGHT_LEG[i]):(2 * RIGHT_LEG[i] + 2)] - poses[:, (2 * RIGHT_LEG[i - 1]):(2 * RIGHT_LEG[i - 1] + 2)] 140 | poses_new[:, 2 * LEFT_LEG[i]:2 * LEFT_LEG[i] + 2] = poses[:, 2 * LEFT_LEG[i]:2 * LEFT_LEG[i] + 2] - poses[:, 2 * LEFT_LEG[i - 1]:2 * LEFT_LEG[i - 1] + 2] 141 | for i in range(1, 5): 142 | poses_new[:, 2 * RIGHT_ARM[i]:2 * RIGHT_ARM[i] + 2] = poses[:, 2 * RIGHT_ARM[i]:2 * RIGHT_ARM[i] + 2] - poses[:, 2 * RIGHT_ARM[i - 1]:2 * RIGHT_ARM[i - 1] + 2] 143 | poses_new[:, 2 * LEFT_ARM[i]:2 * LEFT_ARM[i] + 2] = poses[:, 2 * LEFT_ARM[i]:2 * LEFT_ARM[i] + 2] - poses[:, 2 * LEFT_ARM[i - 1]:2 * LEFT_ARM[i - 1] + 2] 144 | for i in range(1, 3): 145 | poses_new[:, 2 * HEAD[i]:2 * HEAD[i] + 2] = poses[:, 2 * HEAD[i]:2 * HEAD[i] + 2] - poses[:, 2 * HEAD[i - 1]:2 * HEAD[i - 1] + 2] 146 | 147 | ratio = 50 / np.sqrt(np.square(poses_new[:, 16:17]) + np.square(poses_new[:, 17:18])) 148 | poses_ret = poses_new * np.tile(ratio[:, 0:1], [1, 32]) 149 | 150 | for i in range(1, 4): 151 | poses_ret[:, (2 * RIGHT_LEG[i]):(2 * RIGHT_LEG[i] + 2)] = poses_ret[:, (2 * RIGHT_LEG[i]):(2 * RIGHT_LEG[i] + 2)] + poses_ret[:, (2 * RIGHT_LEG[i - 1]):(2 * RIGHT_LEG[i - 1] + 2)] 152 | poses_ret[:, 2 * LEFT_LEG[i]:2 * LEFT_LEG[i] + 2] = poses_ret[:, 2 * LEFT_LEG[i]:2 * LEFT_LEG[i] + 2] + poses_ret[:, 2 * LEFT_LEG[i - 1]:2 * LEFT_LEG[i - 1] + 2] 153 | for i in range(1, 2): 154 | poses_ret[:, (2 * SPINE[i]):(2 * SPINE[i] + 2)] = poses_ret[:, (2 * SPINE[i]):(2 * SPINE[i] + 2)] + poses_ret[:, (2 * SPINE[i - 1]):(2 * SPINE[i - 1] + 2)] 155 | for i in range(1, 4): 156 | poses_ret[:, 2 * RIGHT_ARM_SPINE[i]:2 * RIGHT_ARM_SPINE[i] + 2] = poses_ret[:, 2 * RIGHT_ARM_SPINE[i]:2 * RIGHT_ARM_SPINE[i] + 2] + poses_ret[:, 2 * RIGHT_ARM_SPINE[i - 1]:2 * RIGHT_ARM_SPINE[i - 1] + 2] 157 | poses_ret[:, 2 * LEFT_ARM_SPINE[i]:2 * LEFT_ARM_SPINE[i] + 2] = poses_ret[:, 2 * LEFT_ARM_SPINE[i]:2 * LEFT_ARM_SPINE[i] + 2] + poses_ret[:, 2 * LEFT_ARM_SPINE[i - 1]:2 * LEFT_ARM_SPINE[i - 1] + 2] 158 | for i in range(1, 2): 159 | poses_ret[:, 2 * HEAD_SPINE[i]:2 * HEAD_SPINE[i] + 2] = poses_ret[:, 2 * HEAD_SPINE[i]:2 * HEAD_SPINE[i] + 2] + poses_ret[:, 2 * HEAD_SPINE[i - 1]:2 * HEAD_SPINE[i - 1] + 2] 160 | 161 | center = np.array((250, 250), dtype=float) 162 | centers = np.tile(center, (poses.shape[0], 1)) 163 | centers = centers - poses_ret[:, 12:14] 164 | centers = np.tile(centers, (1, 16)) 165 | poses_ret = poses_ret + centers 166 | 167 | return poses_ret 168 | 169 | 170 | def load_pascal_pose(pascal_pose_file_root): 171 | ''' 172 | Load preprocessed Pascal pose file. 173 | 174 | :param pascal_pose_file_root: 175 | Root of pascal pose file. 176 | :return: 177 | pose_arr: 2-dimension array of aligned pose of shape N x 32, thorax set to be (0, 0). 178 | img_names: list of image names. 179 | pose_dict: dictionary of (image name : unaligned pose[1 x 32]) pairs. 180 | ''' 181 | pascal_pose_file = open(pascal_pose_file_root, "r") 182 | 183 | pose_list = [] 184 | img_names = [] 185 | pose_dict = {} 186 | 187 | line_count = 0 188 | while True: 189 | line = pascal_pose_file.readline() 190 | if not line: 191 | break 192 | words = line.split(",") 193 | img_name = words[0] 194 | pose_tmp = np.zeros((1, 32), dtype=float) 195 | for i in range(16): 196 | x = words[1 + 3 * i] 197 | y = words[1 + 3 * i + 1] 198 | # words[1 + 3 * i + 2] is_visible (not used) 199 | 200 | pose_tmp[0][2 * i] = float(x) 201 | pose_tmp[0][2 * i + 1] = float(y) 202 | 203 | has_negative = False 204 | for i in range(16): 205 | if pose_tmp[0][2 * i] < 0 or pose_tmp[0][2 * i + 1] < 0: 206 | has_negative = True 207 | break 208 | if has_negative: 209 | continue 210 | pose_tmp[0][12] = (pose_tmp[0][4] + pose_tmp[0][6]) / 2 211 | pose_tmp[0][13] = (pose_tmp[0][5] + pose_tmp[0][7]) / 2 212 | pose_list.append(pose_tmp) 213 | img_names.append(img_name[0:len(img_name) - 4]) 214 | pose_dict[img_name[0:len(img_name) - 4]] = pose_tmp 215 | 216 | line_count += 1 217 | 218 | pascal_pose_file.close() 219 | 220 | pose_arr = np.zeros((len(pose_list), 32), dtype=float) 221 | for i in range(len(pose_list)): 222 | pose_arr[i] = pose_list[i] 223 | 224 | pose_arr = align_torso(pose_arr) 225 | pose_arr[:, 14:16] = np.tile(np.zeros((1, 2), dtype=float), (len(pose_list), 1)) 226 | return pose_arr, img_names, pose_dict 227 | 228 | 229 | def paint(mask_img, merge): 230 | ''' 231 | Paint parsing result(mask) to color image. 232 | 233 | :param mask_img: 234 | 1-channel parsing result. 235 | :param merge: 236 | 0 or 1. If 1, merge 10 parts into 6 parts. 237 | :return: 238 | Corresponding color image. 239 | ''' 240 | assert (len(mask_img.shape) == 2) 241 | if merge: 242 | return body_part_color[merge_mask[mask_img]] 243 | else: 244 | return body_part_color[mask_img] 245 | 246 | 247 | def morphing(origin_mask_img, origin_pose, target_pose, target_size): # target_size [width, height] 248 | ''' 249 | According to origin pose and target pose, morph the origin mask image so as to get the same pose as the target pose. 250 | 251 | :param origin_mask_img: 252 | Origin mask image, 1-channel, of labels 0-10 (0 for backgraound). 253 | :param origin_pose: 254 | 1-dimension pose array, of shape (32, ). 255 | :param target_pose: 256 | 1-dimension pose array, of shape (32, ). 257 | :param target_size: 258 | Target image size: [width, height]. 259 | :return: 260 | Color image of morphed mask image, of size target_size. 261 | ''' 262 | assert (len(origin_mask_img.shape) == 2) 263 | assert (len(origin_pose.shape) == 1) 264 | assert (len(target_pose.shape) == 1) 265 | 266 | target_mask_img = np.zeros((target_size[1], target_size[0]), dtype=np.uint8) 267 | # morphing for each part 268 | for label in range(1, 11): 269 | origin_size = np.array([origin_mask_img.shape[1], origin_mask_img.shape[0]], dtype=int) 270 | origin_body_part = origin_mask_img * (origin_mask_img == label) 271 | a = main_skeleton_lines[label][0] 272 | b = main_skeleton_lines[label][1] 273 | origin_pose_part_a = np.array([origin_pose[a * 2], origin_pose[a * 2 + 1]], dtype=float) 274 | origin_pose_part_b = np.array([origin_pose[b * 2], origin_pose[b * 2 + 1]], dtype=float) 275 | origin_pose_part_tensor = origin_pose_part_b - origin_pose_part_a 276 | target_pose_part_a = np.array([target_pose[a * 2], target_pose[a * 2 + 1]], dtype=float) 277 | target_pose_part_b = np.array([target_pose[b * 2], target_pose[b * 2 + 1]], dtype=float) 278 | target_pose_part_tensor = target_pose_part_b - target_pose_part_a 279 | origin_pose_part_length = np.sqrt(np.sum(np.square(origin_pose_part_tensor))) 280 | target_pose_part_length = np.sqrt(np.sum(np.square(target_pose_part_tensor))) 281 | # scaling ratio 282 | scale_factor = target_pose_part_length / origin_pose_part_length 283 | if scale_factor == 0: 284 | continue 285 | # rotating angle 286 | theta = - (np.arctan2(target_pose_part_tensor[1], target_pose_part_tensor[0]) - np.arctan2( 287 | origin_pose_part_tensor[1], origin_pose_part_tensor[0])) * 180 / np.pi 288 | 289 | ''' scale ''' 290 | origin_size[0] *= scale_factor 291 | origin_size[1] *= scale_factor 292 | origin_pose_part_a *= scale_factor 293 | origin_pose_part_b *= scale_factor 294 | origin_body_part = cv2.resize(origin_body_part, (origin_size[0], origin_size[1]), 295 | interpolation=cv2.INTER_NEAREST) 296 | # print("finish scale", label) 297 | 298 | ''' translate to the center in case rotation out of the image ''' 299 | origin_pose_part_center = (origin_pose_part_a + origin_pose_part_b) / 2 300 | origin_center = origin_size / 2 301 | tx = origin_center[0] - int(origin_pose_part_center[0]) 302 | ty = origin_center[1] - int(origin_pose_part_center[1]) 303 | tm = np.float32([[1, 0, tx], [0, 1, ty]]) 304 | origin_body_part = cv2.warpAffine(origin_body_part, tm, (origin_size[0], origin_size[1])) 305 | # print("finish translate", label) 306 | 307 | ''' rotate ''' 308 | rm = cv2.getRotationMatrix2D((origin_center[0], origin_center[1]), theta, 1) 309 | origin_body_part = cv2.warpAffine(origin_body_part, rm, (origin_size[0], origin_size[1])) 310 | origin_body_part = (origin_body_part != 0) * label 311 | # print("finish rotate", label) 312 | 313 | ''' crop and paste ''' 314 | target_pose_part_center = (target_pose_part_a + target_pose_part_b) / 2 315 | target_pose_part_center[0] = int(target_pose_part_center[0]) 316 | target_pose_part_center[1] = int(target_pose_part_center[1]) 317 | if target_pose_part_center[1] >= origin_center[1]: 318 | origin_row_low = 0 319 | target_row_low = target_pose_part_center[1] - origin_center[1] 320 | else: 321 | origin_row_low = origin_center[1] - target_pose_part_center[1] 322 | target_row_low = 0 323 | if (target_size[1] - target_pose_part_center[1]) >= (origin_size[1] - origin_center[1]): 324 | origin_row_high = origin_size[1] 325 | target_row_high = target_pose_part_center[1] + origin_size[1] - origin_center[1] 326 | else: 327 | origin_row_high = origin_center[1] + target_size[1] - target_pose_part_center[1] 328 | target_row_high = target_size[1] 329 | if target_pose_part_center[0] >= origin_center[0]: 330 | origin_col_low = 0 331 | target_col_low = target_pose_part_center[0] - origin_center[0] 332 | else: 333 | origin_col_low = origin_center[0] - target_pose_part_center[0] 334 | target_col_low = 0 335 | if (target_size[0] - target_pose_part_center[0]) >= (origin_size[0] - origin_center[0]): 336 | origin_col_high = origin_size[0] 337 | target_col_high = target_pose_part_center[0] + origin_size[0] - origin_center[0] 338 | else: 339 | origin_col_high = origin_center[0] + target_size[0] - target_pose_part_center[0] 340 | target_col_high = target_size[0] 341 | origin_row_low = int(origin_row_low) 342 | target_row_low = int(target_row_low) 343 | origin_row_high = int(origin_row_high) 344 | target_row_high = int(target_row_high) 345 | origin_col_low = int(origin_col_low) 346 | target_col_low = int(target_col_low) 347 | origin_col_high = int(origin_col_high) 348 | target_col_high = int(target_col_high) 349 | target_mask_img[target_row_low:target_row_high, target_col_low:target_col_high] = np.maximum( 350 | target_mask_img[target_row_low:target_row_high, target_col_low:target_col_high], 351 | origin_body_part[origin_row_low:origin_row_high, origin_col_low:origin_col_high]) 352 | # print("finish crop and paste", label) 353 | 354 | return paint(target_mask_img, merge=True) 355 | 356 | 357 | def random_k_within_n(n, k): 358 | ''' 359 | Get random k number in [0, n). 360 | 361 | :param n: 362 | :param k: 363 | :return: 364 | ''' 365 | arr = np.arange(n) 366 | random.shuffle(arr) 367 | return arr[0:k] 368 | 369 | 370 | def find_nearest_pose(aligned_pose, pascal_poses): 371 | ''' 372 | Given a aligned pose, order the aligned pascal pose array and return the index array. 373 | 374 | :param aligned_pose: 375 | Aligned pose(throax set to (0, 0)), 1-d(32, ) or 2-d(1, 32). 376 | :param pascal_poses: 377 | Aligned pascal poses, of shape(N x 32). 378 | :return: 379 | Ordered index array. 380 | ''' 381 | pose_tmp = np.tile(aligned_pose, (pascal_poses.shape[0], 1)) 382 | pose_pascal_distance = np.sum(np.square(pose_tmp - pascal_poses), axis=1) 383 | distance_index = np.argsort(pose_pascal_distance) 384 | return distance_index 385 | 386 | 387 | def generate_prior_single_person(bbox, raw_pose, PASCALMaskImgDir, pascal_poses, pascal_img_names, pascal_pose_dict, n, k, exclude_self=False, save_dir=None): 388 | ''' 389 | Generate prior for a single person. 390 | 391 | :param bbox: 392 | Bounding box of the person(can be image size). 393 | The pose will be aligned according to the upper left coordinate of bbox. 394 | The generated prior image size is the same as bbox. 395 | The lower right point is inclusive. 396 | :param raw_pose: 397 | Unaligned pose of shape (1, 32). 398 | :param save_dir: 399 | Path to save n nearest priors. If is None, do not save. 400 | :param PASCALMaskImgDir: 401 | Path to pascal mask images. 402 | :param pascal_poses: 403 | 2-d array of aligned pascal poses. 404 | :param pascal_img_names: 405 | List of pascal image names. 406 | :param pascal_pose_dict: 407 | Dictionary of (image name : unaligned pose) pairs. 408 | :param n: 409 | Pick n nearest poses. 410 | :param k: 411 | Average nearest k priors. 412 | :param exclude_self: 413 | If 1, when we are finding nearest pose for pascal image in pascal images, the nearset one must be itself, so that the first one should be excluded. 414 | If 0, the first one not excluded. 415 | :return: 416 | The averaged prior. At the same time, n priors are saved in save_dir. 417 | ''' 418 | # raw_pose: [[x1,y1,x2,y2,...,x16,y16]] 419 | aligned_pose = align_torso(raw_pose) 420 | aligned_pose[:, 14:16] = np.tile(np.zeros((1, 2), dtype=float), (1, 1)) 421 | distance_index = find_nearest_pose(aligned_pose, pascal_poses) 422 | if exclude_self: 423 | distance_index = distance_index[1:] 424 | 425 | # bbox: [x1, y1, x2, y2] 426 | width = bbox[2] - bbox[0] + 1 427 | heigth = bbox[3] - bbox[1] + 1 428 | pose = copy.deepcopy(raw_pose) 429 | pose -= np.tile(np.array([bbox[0], bbox[1]]), (1, 16)) 430 | pose = pose[0] 431 | origin_size = np.array([int(width), int(heigth)], dtype=int) 432 | average_parsing = np.zeros((heigth, width, 3), dtype=float) 433 | 434 | close_pascal_index = distance_index[random_k_within_n(n, k)] 435 | for j in range(n): 436 | if distance_index[j] not in close_pascal_index and save_dir is None: 437 | continue 438 | pascal_name = pascal_img_names[distance_index[j]] 439 | # get PASCAL mask img and morph 440 | print(j, 'picked pascal', pascal_name) 441 | pascal_mask_img = cv2.imread(os.path.join(PASCALMaskImgDir, pascal_name + ".png"), 0) 442 | # cv2.imwrite(os.path.join(save_dir, 'origin_' + str(j) + '_' + pascal_name + '.png'), paint(pascal_mask_img)) 443 | pascal_pose = pascal_pose_dict[pascal_name][0] 444 | morphingImg = morphing(pascal_mask_img, pascal_pose, pose, origin_size) 445 | if save_dir is not None: 446 | cv2.imwrite(os.path.join(save_dir, str(j) + '.png'), morphingImg[:, :, [2, 1, 0]]) 447 | if distance_index[j] in close_pascal_index: 448 | average_parsing += morphingImg 449 | 450 | average_parsing /= k 451 | return average_parsing 452 | --------------------------------------------------------------------------------