├── .gitignore
├── data_generation
├── refinement_network
│ ├── data
│ │ ├── __init__.py
│ │ ├── base_data_loader.py
│ │ ├── data_loader.py
│ │ ├── custom_dataset_data_loader.py
│ │ ├── single_dataset.py
│ │ ├── base_dataset.py
│ │ ├── image_folder.py
│ │ └── aligned_dataset.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── models.py
│ │ ├── base_model.py
│ │ ├── test_model.py
│ │ └── pix2pix_model.py
│ ├── util
│ │ ├── __init__.py
│ │ ├── image_pool.py
│ │ ├── html.py
│ │ ├── util.py
│ │ └── visualizer.py
│ ├── options
│ │ ├── __init__.py
│ │ ├── test_options.py
│ │ ├── train_options.py
│ │ └── base_options.py
│ ├── scripts
│ │ ├── test.sh
│ │ └── train.sh
│ ├── test.py
│ ├── datasets
│ │ └── generate_pascal_training_prior.py
│ ├── train.py
│ └── README.md
├── examples
│ ├── examples_origin.jpg
│ ├── examples_overlay.jpg
│ ├── origin_images
│ │ ├── COCO_train2014_000000036827.jpg
│ │ └── COCO_train2014_000000131780.jpg
│ └── examples.json
├── pose2label.sh
├── demo.sh
├── overlay.py
├── pick_full_person.py
├── merge_parsing_result.py
├── README.md
├── crop_pose_and_generate_testing_prior.py
└── generate_prior_util.py
├── parsing_network
├── dataset
│ └── README
├── kaffe
│ ├── caffe
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── resolver.pyc
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-34.pyc
│ │ │ └── resolver.cpython-34.pyc
│ │ └── resolver.py
│ ├── graph.pyc
│ ├── __init__.pyc
│ ├── errors.pyc
│ ├── layers.pyc
│ ├── shapes.pyc
│ ├── tensorflow
│ │ ├── __init__.py
│ │ ├── network.pyc
│ │ ├── __init__.pyc
│ │ ├── transformer.pyc
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-34.pyc
│ │ │ ├── network.cpython-34.pyc
│ │ │ └── transformer.cpython-34.pyc
│ │ ├── transformer.py
│ │ └── network.py
│ ├── transformers.pyc
│ ├── __pycache__
│ │ ├── graph.cpython-34.pyc
│ │ ├── errors.cpython-34.pyc
│ │ ├── layers.cpython-34.pyc
│ │ ├── shapes.cpython-34.pyc
│ │ ├── __init__.cpython-34.pyc
│ │ └── transformers.cpython-34.pyc
│ ├── __init__.py
│ ├── errors.py
│ ├── shapes.py
│ ├── layers.py
│ ├── transformers.py
│ └── graph.py
├── output
│ └── mj.png
├── misc
│ ├── 2007_000129.jpg
│ └── 2007_000129.png
├── requirements.txt
├── deeplab_resnet
│ ├── model.pyc
│ ├── utils.pyc
│ ├── __init__.pyc
│ ├── image_reader.pyc
│ ├── __pycache__
│ │ ├── model.cpython-34.pyc
│ │ ├── utils.cpython-34.pyc
│ │ ├── __init__.cpython-34.pyc
│ │ └── image_reader.cpython-34.pyc
│ ├── __init__.py
│ ├── utils.py
│ └── image_reader.py
├── LICENSE
├── README.md
├── real-time-inference.py
├── inference.py
├── evaluate.py
└── train.py
├── imgs
├── comparison.gif
└── transferred.jpg
├── LICENSE
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/util/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/options/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/parsing_network/dataset/README:
--------------------------------------------------------------------------------
1 | unzip the data `archive.zip` here
2 |
--------------------------------------------------------------------------------
/imgs/comparison.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/imgs/comparison.gif
--------------------------------------------------------------------------------
/imgs/transferred.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/imgs/transferred.jpg
--------------------------------------------------------------------------------
/parsing_network/kaffe/caffe/__init__.py:
--------------------------------------------------------------------------------
1 | from .resolver import get_caffe_resolver, has_pycaffe
2 |
--------------------------------------------------------------------------------
/parsing_network/output/mj.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/output/mj.png
--------------------------------------------------------------------------------
/parsing_network/kaffe/graph.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/graph.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__init__.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/errors.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/errors.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/layers.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/layers.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/shapes.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/shapes.pyc
--------------------------------------------------------------------------------
/parsing_network/misc/2007_000129.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/misc/2007_000129.jpg
--------------------------------------------------------------------------------
/parsing_network/misc/2007_000129.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/misc/2007_000129.png
--------------------------------------------------------------------------------
/parsing_network/requirements.txt:
--------------------------------------------------------------------------------
1 | Cython>=0.19.2
2 | numpy>=1.7.1
3 | matplotlib>=1.3.1
4 | Pillow>=2.3.0
5 | six>=1.1.0
6 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/tensorflow/__init__.py:
--------------------------------------------------------------------------------
1 | from .transformer import TensorFlowTransformer
2 | from .network import Network
3 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/transformers.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/transformers.pyc
--------------------------------------------------------------------------------
/parsing_network/deeplab_resnet/model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/model.pyc
--------------------------------------------------------------------------------
/parsing_network/deeplab_resnet/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/utils.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/caffe/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/caffe/__init__.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/caffe/resolver.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/caffe/resolver.pyc
--------------------------------------------------------------------------------
/data_generation/examples/examples_origin.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/data_generation/examples/examples_origin.jpg
--------------------------------------------------------------------------------
/parsing_network/deeplab_resnet/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/__init__.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/tensorflow/network.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/network.pyc
--------------------------------------------------------------------------------
/data_generation/examples/examples_overlay.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/data_generation/examples/examples_overlay.jpg
--------------------------------------------------------------------------------
/parsing_network/kaffe/tensorflow/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/__init__.pyc
--------------------------------------------------------------------------------
/parsing_network/deeplab_resnet/image_reader.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/image_reader.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/tensorflow/transformer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/transformer.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/__pycache__/graph.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/graph.cpython-34.pyc
--------------------------------------------------------------------------------
/data_generation/refinement_network/scripts/test.sh:
--------------------------------------------------------------------------------
1 | python3 test.py --dataroot /path/to/dataset --dataset_mode single --model test --output_nc 1 --name exp1
2 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/__pycache__/errors.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/errors.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/__pycache__/layers.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/layers.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/__pycache__/shapes.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/shapes.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/__init__.py:
--------------------------------------------------------------------------------
1 | from .graph import GraphBuilder, NodeMapper
2 | from .errors import KaffeError, print_stderr
3 |
4 | from . import tensorflow
5 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/__pycache__/__init__.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/__init__.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/errors.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | class KaffeError(Exception):
4 | pass
5 |
6 | def print_stderr(msg):
7 | sys.stderr.write('%s\n' % msg)
8 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/__pycache__/transformers.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/__pycache__/transformers.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/deeplab_resnet/__pycache__/model.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/__pycache__/model.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/deeplab_resnet/__pycache__/utils.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/__pycache__/utils.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/caffe/__pycache__/__init__.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/caffe/__pycache__/__init__.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/caffe/__pycache__/resolver.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/caffe/__pycache__/resolver.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/deeplab_resnet/__pycache__/__init__.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/__pycache__/__init__.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/tensorflow/__pycache__/__init__.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/__pycache__/__init__.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/tensorflow/__pycache__/network.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/__pycache__/network.cpython-34.pyc
--------------------------------------------------------------------------------
/data_generation/examples/origin_images/COCO_train2014_000000036827.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/data_generation/examples/origin_images/COCO_train2014_000000036827.jpg
--------------------------------------------------------------------------------
/data_generation/examples/origin_images/COCO_train2014_000000131780.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/data_generation/examples/origin_images/COCO_train2014_000000131780.jpg
--------------------------------------------------------------------------------
/parsing_network/deeplab_resnet/__pycache__/image_reader.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/deeplab_resnet/__pycache__/image_reader.cpython-34.pyc
--------------------------------------------------------------------------------
/parsing_network/kaffe/tensorflow/__pycache__/transformer.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/WSHP/HEAD/parsing_network/kaffe/tensorflow/__pycache__/transformer.cpython-34.pyc
--------------------------------------------------------------------------------
/data_generation/refinement_network/scripts/train.sh:
--------------------------------------------------------------------------------
1 | python3 train.py --dataroot /path/to/dataset --dataset_mode aligned --model pix2pix --no_gan --shuffle --n 5 --k 3 --output_nc 1 --name exp1
2 |
--------------------------------------------------------------------------------
/parsing_network/deeplab_resnet/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import DeepLabResNetModel
2 | from .image_reader import ImageReader
3 | from .utils import decode_labels, inv_preprocess, prepare_label
4 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/data/base_data_loader.py:
--------------------------------------------------------------------------------
1 | class BaseDataLoader():
2 | def __init__(self):
3 | pass
4 |
5 | def initialize(self, opt):
6 | self.opt = opt
7 | pass
8 |
9 | def load_data(self):
10 | return None
11 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/data/data_loader.py:
--------------------------------------------------------------------------------
1 | def CreateDataLoader(opt):
2 | from data.custom_dataset_data_loader import CustomDatasetDataLoader
3 | data_loader = CustomDatasetDataLoader()
4 | print(data_loader.name())
5 | data_loader.initialize(opt)
6 | return data_loader
7 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/models/models.py:
--------------------------------------------------------------------------------
1 | def create_model(opt):
2 | model = None
3 | print(opt.model)
4 | if opt.model == 'pix2pix':
5 | assert(opt.dataset_mode.find('aligned') != -1)
6 | from .pix2pix_model import Pix2PixModel
7 | model = Pix2PixModel()
8 | elif opt.model == 'test':
9 | assert(opt.dataset_mode.find('single') != -1)
10 | from .test_model import TestModel
11 | model = TestModel()
12 | else:
13 | raise ValueError("Model [%s] not recognized." % opt.model)
14 | model.initialize(opt)
15 | print("model [%s] was created" % (model.name()))
16 | return model
17 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | WSHP: Weakly and Semi Supervised Human Body Part Parsing via Pose-Guided Knowledge Transfer
2 |
3 | SOFTWARE LICENSE AGREEMENT
4 | ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
5 |
6 | BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
7 |
8 | This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Shanghai Jiao Tong University (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor.
9 |
10 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/options/test_options.py:
--------------------------------------------------------------------------------
1 | from .base_options import BaseOptions
2 |
3 |
4 | class TestOptions(BaseOptions):
5 | def initialize(self):
6 | BaseOptions.initialize(self)
7 | self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.')
8 | self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
9 | self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
10 | self.parser.add_argument('--phase', type=str, default='test', help='train, test, etc')
11 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
12 | self.parser.add_argument('--how_many', type=int, default=float("inf"), help='how many test images to run')
13 | self.isTrain = False
14 |
--------------------------------------------------------------------------------
/parsing_network/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Vladimir Nekrasov
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/data_generation/pose2label.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pascal_pose_file_root="/path/to/pascal_pose_file.csv"
3 | pascal_mask_img_dir="/path/to/pascal_mask_img"
4 | origin_img_root="/path/to/origin_img"
5 | json_file_root="/path/to/pose_json_file"
6 | crop_output_path="/path/to/output/cropped_img_and_prior"
7 | experiment_name="exp1"
8 | merge_output_path="/path/to/output/merged_parsing_label"
9 | overlay_output_path="/path/to/output/overlayed_image"
10 |
11 | python crop_pose_and_generate_testing_prior.py --PASCALPoseFileRoot $pascal_pose_file_root --PASCALMaskImgDir $pascal_mask_img_dir --n 3 --k 3 --aug 0.25 --origin_img_root $origin_img_root --json_file_root $json_file_root --outputDir $crop_output_path
12 | cd refinement_network
13 | python3 test.py --dataroot $crop_output_path --dataset_mode single --model test --output_nc 1 --name $experiment_name --which_epoch latest
14 | cd ..
15 | python merge_parsing_result.py --outputDir $merge_output_path --parsing_root ./refinement_network/results/${experiment_name}/test_latest/images --origin_img_root $origin_img_root --json_file_root $json_file_root --aug 0.25
16 | python overlay.py --origin_img_root $origin_img_root --parsing_img_root $merge_output_path --outputDir $overlay_output_path
17 |
--------------------------------------------------------------------------------
/data_generation/demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pascal_pose_file_root="./examples/pascalPose.csv"
3 | pascal_mask_img_dir="./examples/pascal_mask"
4 | origin_img_root="./examples/origin_images"
5 | json_file_root="./examples/examples.json"
6 | crop_output_path="./examples/outputs/crop_output"
7 | experiment_name="pretrained_model"
8 | merge_output_path="./examples/outputs/merge_output"
9 | overlay_output_path="./examples/outputs/overlay_output"
10 |
11 | python crop_pose_and_generate_testing_prior.py --PASCALPoseFileRoot $pascal_pose_file_root --PASCALMaskImgDir $pascal_mask_img_dir --n 3 --k 3 --aug 0.25 --origin_img_root $origin_img_root --json_file_root $json_file_root --outputDir $crop_output_path
12 | cd refinement_network
13 | python3 test.py --dataroot ../$crop_output_path --dataset_mode single --model test --output_nc 1 --name $experiment_name --which_epoch latest --checkpoints_dir ../examples --results_dir ../examples
14 | cd ..
15 | python merge_parsing_result.py --outputDir $merge_output_path --parsing_root ./examples/${experiment_name}/test_latest/images --origin_img_root $origin_img_root --json_file_root $json_file_root --aug 0.25
16 | python overlay.py --origin_img_root $origin_img_root --parsing_img_root $merge_output_path --outputDir $overlay_output_path
--------------------------------------------------------------------------------
/data_generation/refinement_network/util/image_pool.py:
--------------------------------------------------------------------------------
1 | import random
2 | import torch
3 | from torch.autograd import Variable
4 |
5 |
6 | class ImagePool():
7 | def __init__(self, pool_size):
8 | self.pool_size = pool_size
9 | if self.pool_size > 0:
10 | self.num_imgs = 0
11 | self.images = []
12 |
13 | def query(self, images):
14 | if self.pool_size == 0:
15 | return Variable(images)
16 | return_images = []
17 | for image in images:
18 | image = torch.unsqueeze(image, 0)
19 | if self.num_imgs < self.pool_size:
20 | self.num_imgs = self.num_imgs + 1
21 | self.images.append(image)
22 | return_images.append(image)
23 | else:
24 | p = random.uniform(0, 1)
25 | if p > 0.5:
26 | random_id = random.randint(0, self.pool_size - 1)
27 | tmp = self.images[random_id].clone()
28 | self.images[random_id] = image
29 | return_images.append(tmp)
30 | else:
31 | return_images.append(image)
32 | return_images = Variable(torch.cat(return_images, 0))
33 | return return_images
34 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/data/custom_dataset_data_loader.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data
2 | from data.base_data_loader import BaseDataLoader
3 |
4 |
5 | def CreateDataset(opt):
6 | dataset = None
7 | if opt.dataset_mode == 'single':
8 | from data.single_dataset import SingleDataset
9 | dataset = SingleDataset()
10 | elif opt.dataset_mode == 'aligned':
11 | from data.aligned_dataset import AlignedDataset
12 | dataset = AlignedDataset()
13 | else:
14 | raise ValueError("Dataset [%s] not recognized." % opt.dataset_mode)
15 |
16 | print("dataset [%s] was created" % (dataset.name()))
17 | dataset.initialize(opt)
18 | return dataset
19 |
20 |
21 | class CustomDatasetDataLoader(BaseDataLoader):
22 | def name(self):
23 | return 'CustomDatasetDataLoader'
24 |
25 | def initialize(self, opt):
26 | BaseDataLoader.initialize(self, opt)
27 | self.dataset = CreateDataset(opt)
28 | self.dataloader = torch.utils.data.DataLoader(
29 | self.dataset,
30 | batch_size=opt.batchSize,
31 | shuffle=not opt.serial_batches,
32 | num_workers=int(opt.nThreads))
33 |
34 | def load_data(self):
35 | return self
36 |
37 | def __len__(self):
38 | return min(len(self.dataset), self.opt.max_dataset_size)
39 |
40 | def __iter__(self):
41 | for i, data in enumerate(self.dataloader):
42 | if i >= self.opt.max_dataset_size:
43 | break
44 | yield data
45 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Weakly and Semi Supervised Human Body Part Parsing via Pose-Guided Knowledge Transfer
3 |
4 | [[arXiv](https://arxiv.org/abs/1805.04310)]
5 |
6 | Transferring human body part parsing labels to raw images by exploiting the anatomical similarity. Some transferred results:
7 |
8 |
9 |
10 |
11 |
12 | These results are used as extra training samples for the parsing network and can improve the part segmentation results:
13 |
14 |
15 |
16 |
17 |
18 |
19 | ## Getting Started
20 |
21 | #### Demo video
22 |
23 | Check out our demo video [here](https://youtu.be/nDqnMpE6b8s).
24 |
25 | #### Parsing Network
26 |
27 | Checkout `parsing_network` for training\testing\demo code of our parsing network.
28 |
29 | #### Data generation
30 |
31 | Checkout `data_generation` for code of using keypoints similarity to transfer parsing knowledge and generate synthetic training labels.
32 |
33 | ## Feedback
34 |
35 | If you get any problems during usage, please open an issue.
36 |
37 | ## Citation
38 | If you use this code for your research, please cite our paper:
39 |
40 | ```
41 | @article{fang2018wshp,
42 | title={Weakly and Semi Supervised Human Body Part Parsing via Pose-Guided Knowledge Transfer},
43 | author={Fang, Hao-Shu and Lu, Guansong and Fang, Xiaolin and Xie, Jianwen and Tai, Yu-Wing and Lu, Cewu},
44 | journal={CVPR},
45 | year={2018}
46 | }
47 | ```
48 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/test.py:
--------------------------------------------------------------------------------
1 | import os
2 | from options.test_options import TestOptions
3 | from data.data_loader import CreateDataLoader
4 | from models.models import create_model
5 | from util.visualizer import Visualizer
6 | from util import html
7 | from collections import OrderedDict
8 |
9 | opt = TestOptions().parse()
10 | opt.nThreads = 1 # test code only supports nThreads = 1
11 | opt.batchSize = 1 # test code only supports batchSize = 1
12 | opt.serial_batches = True # no shuffle
13 | opt.no_flip = True # no flip
14 |
15 | data_loader = CreateDataLoader(opt)
16 | dataset = data_loader.load_data()
17 | model = create_model(opt)
18 | visualizer = Visualizer(opt)
19 | # create website
20 | web_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.which_epoch))
21 | webpage = html.HTML(web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' % (opt.name, opt.phase, opt.which_epoch))
22 | # test
23 | if opt.output_nc == 1:
24 | save_list = ['fake_B_postprocessed', 'fake_B_color']
25 | else:
26 | save_list = ['fake_B']
27 |
28 | for i, data in enumerate(dataset):
29 | if i >= opt.how_many:
30 | break
31 | model.set_input(data)
32 | model.test()
33 | visuals = model.get_current_visuals()
34 | visuals_selected = OrderedDict()
35 | for key in save_list:
36 | visuals_selected[key] = visuals[key]
37 | img_path = model.get_image_paths()
38 | print('%04d: process image... %s' % (i, img_path))
39 | visualizer.save_images(webpage, visuals_selected, img_path, aspect_ratio=opt.aspect_ratio)
40 |
41 | webpage.save()
42 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/caffe/resolver.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | SHARED_CAFFE_RESOLVER = None
4 |
5 | class CaffeResolver(object):
6 | def __init__(self):
7 | self.import_caffe()
8 |
9 | def import_caffe(self):
10 | self.caffe = None
11 | try:
12 | # Try to import PyCaffe first
13 | import caffe
14 | self.caffe = caffe
15 | except ImportError:
16 | # Fall back to the protobuf implementation
17 | from . import caffepb
18 | self.caffepb = caffepb
19 | show_fallback_warning()
20 | if self.caffe:
21 | # Use the protobuf code from the imported distribution.
22 | # This way, Caffe variants with custom layers will work.
23 | self.caffepb = self.caffe.proto.caffe_pb2
24 | self.NetParameter = self.caffepb.NetParameter
25 |
26 | def has_pycaffe(self):
27 | return self.caffe is not None
28 |
29 | def get_caffe_resolver():
30 | global SHARED_CAFFE_RESOLVER
31 | if SHARED_CAFFE_RESOLVER is None:
32 | SHARED_CAFFE_RESOLVER = CaffeResolver()
33 | return SHARED_CAFFE_RESOLVER
34 |
35 | def has_pycaffe():
36 | return get_caffe_resolver().has_pycaffe()
37 |
38 | def show_fallback_warning():
39 | msg = '''
40 | ------------------------------------------------------------
41 | WARNING: PyCaffe not found!
42 | Falling back to a pure protocol buffer implementation.
43 | * Conversions will be drastically slower.
44 | * This backend is UNTESTED!
45 | ------------------------------------------------------------
46 |
47 | '''
48 | sys.stderr.write(msg)
49 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/data/single_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | from data.base_dataset import BaseDataset, get_transform
3 | from data.image_folder import make_dataset
4 | from PIL import Image
5 | import torchvision.transforms as transforms
6 | import torch
7 |
8 |
9 | class SingleDataset(BaseDataset):
10 | def initialize(self, opt):
11 | self.opt = opt
12 | self.root = opt.dataroot
13 | self.dir_A = os.path.join(opt.dataroot)
14 | self.dir_img = os.path.join(self.dir_A, 'img')
15 | self.dir_prior = os.path.join(self.dir_A, 'prior')
16 |
17 | self.img_paths = sorted(make_dataset(self.dir_img))
18 | self.prior_paths = sorted(make_dataset(self.dir_prior))
19 |
20 | def __getitem__(self, index):
21 | img_path = self.img_paths[index]
22 | img = Image.open(img_path).convert('RGB')
23 | img = img.resize((self.opt.fineSize, self.opt.fineSize), Image.BICUBIC)
24 | img = transforms.ToTensor()(img)
25 |
26 | prior_path = self.prior_paths[index]
27 | prior = Image.open(prior_path).convert('RGB')
28 | prior = prior.resize((self.opt.fineSize, self.opt.fineSize), Image.BICUBIC)
29 | prior = transforms.ToTensor()(prior)
30 |
31 | img = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(img)
32 | prior = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(prior)
33 |
34 | if self.opt.which_direction == 'BtoA':
35 | input_nc = self.opt.output_nc
36 | else:
37 | input_nc = self.opt.input_nc
38 |
39 | A = torch.cat([img, prior], dim=0)
40 |
41 | return {'A': A, 'A_paths': img_path}
42 |
43 | def __len__(self):
44 | return len(self.img_paths)
45 |
46 | def name(self):
47 | return 'SingleDataset'
48 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/data/base_dataset.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data as data
2 | from PIL import Image
3 | import torchvision.transforms as transforms
4 |
5 |
6 | class BaseDataset(data.Dataset):
7 | def __init__(self):
8 | super(BaseDataset, self).__init__()
9 |
10 | def name(self):
11 | return 'BaseDataset'
12 |
13 | def initialize(self, opt):
14 | pass
15 |
16 |
17 | def get_transform(opt):
18 | transform_list = []
19 | if opt.resize_or_crop == 'resize_and_crop':
20 | osize = [opt.loadSize, opt.loadSize]
21 | transform_list.append(transforms.Scale(osize, Image.BICUBIC))
22 | transform_list.append(transforms.RandomCrop(opt.fineSize))
23 | elif opt.resize_or_crop == 'crop':
24 | transform_list.append(transforms.RandomCrop(opt.fineSize))
25 | elif opt.resize_or_crop == 'scale_width':
26 | transform_list.append(transforms.Lambda(
27 | lambda img: __scale_width(img, opt.fineSize)))
28 | elif opt.resize_or_crop == 'scale_width_and_crop':
29 | transform_list.append(transforms.Lambda(
30 | lambda img: __scale_width(img, opt.loadSize)))
31 | transform_list.append(transforms.RandomCrop(opt.fineSize))
32 |
33 | if opt.isTrain and not opt.no_flip:
34 | transform_list.append(transforms.RandomHorizontalFlip())
35 |
36 | transform_list += [transforms.ToTensor(),
37 | transforms.Normalize((0.5, 0.5, 0.5),
38 | (0.5, 0.5, 0.5))]
39 | return transforms.Compose(transform_list)
40 |
41 |
42 | def __scale_width(img, target_width):
43 | ow, oh = img.size
44 | if (ow == target_width):
45 | return img
46 | w = target_width
47 | h = int(target_width * oh / ow)
48 | return img.resize((w, h), Image.BICUBIC)
49 |
--------------------------------------------------------------------------------
/data_generation/overlay.py:
--------------------------------------------------------------------------------
1 | '''
2 | Overlay origin image and painted parsing label.
3 |
4 | >>> python overlay.py --origin_img_root /path/to/origin_img --parsing_img_root /path/to/parsing_img --outputDir /path/to/output
5 | >>>
6 | '''
7 |
8 | import os
9 | import numpy as np
10 | import cv2
11 | import argparse
12 |
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument("--origin_img_root", help="path to origin img")
15 | parser.add_argument("--parsing_img_root", help="path to parsing img")
16 | parser.add_argument("--outputDir", help="where to put output files")
17 | parser.add_argument("--factor", type=int, default=1, help='multiply factor')
18 | parser.add_argument("--aug", type=float, default=0, help='augmentation factor for crop')
19 | a = parser.parse_args()
20 |
21 | origin_img_root = a.origin_img_root
22 | parsing_img_root= a.parsing_img_root
23 | output_path = a.outputDir
24 |
25 | body_part_color = np.array([
26 | [0, 0, 0],
27 | [128, 0, 0],
28 | [0, 128, 0],
29 | [128, 128, 0],
30 | [0, 0, 128],
31 | [128, 0, 128],
32 | [0, 128, 128],
33 | [128, 128, 128],
34 | [64, 0, 0],
35 | [192, 0, 0],
36 | [64, 128, 0]],
37 | dtype=np.uint8)
38 |
39 | def paint(mask_img):
40 | assert(len(mask_img.shape) == 2)
41 | return body_part_color[mask_img]
42 |
43 | def overlay(origin_img, parsing_img):
44 | overlay_img = origin_img*0.7 + parsing_img[:,:,[2,1,0]]*0.9
45 | overlay_img = (overlay_img > 255) * 255 + overlay_img * (overlay_img <= 255)
46 | return overlay_img
47 |
48 | if not os.path.exists(output_path):
49 | os.makedirs(output_path)
50 |
51 | for root, dirs, files in os.walk(parsing_img_root):
52 | for file in files:
53 | origin_img = cv2.imread(os.path.join(origin_img_root, file[0:len(file)-4]+'.jpg'))
54 | parsing_img = cv2.imread(os.path.join(root, file), 0)
55 | overlay_img = overlay(origin_img, paint(parsing_img))
56 |
57 | cv2.imwrite(os.path.join(output_path, file), overlay_img)
58 | print(file)
59 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/models/base_model.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 |
4 |
5 | class BaseModel():
6 | def name(self):
7 | return 'BaseModel'
8 |
9 | def initialize(self, opt):
10 | self.opt = opt
11 | self.gpu_ids = opt.gpu_ids
12 | self.isTrain = opt.isTrain
13 | self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor
14 | self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
15 |
16 | def set_input(self, input):
17 | self.input = input
18 |
19 | def forward(self):
20 | pass
21 |
22 | # used in test time, no backprop
23 | def test(self):
24 | pass
25 |
26 | def get_image_paths(self):
27 | pass
28 |
29 | def optimize_parameters(self):
30 | pass
31 |
32 | def get_current_visuals(self):
33 | return self.input
34 |
35 | def get_current_errors(self):
36 | return {}
37 |
38 | def save(self, label):
39 | pass
40 |
41 | # helper saving function that can be used by subclasses
42 | def save_network(self, network, network_label, epoch_label, gpu_ids):
43 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
44 | save_path = os.path.join(self.save_dir, save_filename)
45 | torch.save(network.cpu().state_dict(), save_path)
46 | if len(gpu_ids) and torch.cuda.is_available():
47 | network.cuda(gpu_ids[0])
48 |
49 | # helper loading function that can be used by subclasses
50 | def load_network(self, network, network_label, epoch_label):
51 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
52 | save_path = os.path.join(self.save_dir, save_filename)
53 | network.load_state_dict(torch.load(save_path))
54 |
55 | # update learning rate (called once every epoch)
56 | def update_learning_rate(self):
57 | for scheduler in self.schedulers:
58 | scheduler.step()
59 | lr = self.optimizers[0].param_groups[0]['lr']
60 | print('learning rate = %.7f' % lr)
61 |
--------------------------------------------------------------------------------
/data_generation/examples/examples.json:
--------------------------------------------------------------------------------
1 | {"COCO_train2014_000000131780.jpg": {"version": 0.1, "bodies": [{"joints": [317, 294, 0.649612545967, 321, 308, 0.671884775162, 317, 314, 0.750793337822, 315, 330, 0.698100686073, 312, 343, 0.746511995792, 326, 313, 0.782322406769, 327, 330, 0.731025755405, 325, 343, 0.769655823708, 317, 344, 0.579411387444, 318, 365, 0.561899662018, 322, 384, 0.496061146259, 322, 344, 0.657607674599, 317, 365, 0.71800494194, 323, 384, 0.690972328186, 321, 314, 0.271296024323]}, {"joints": [144, 286, 0.677381157875, 143, 298, 0.671079695225, 151, 307, 0.833187520504, 154, 322, 0.788151741028, 144, 319, 0.181723922491, 131, 305, 0.868985891342, 128, 320, 0.708187639713, 138, 318, 0.19234508276, 146, 341, 0.685109257698, 146, 365, 0.792463898659, 146, 385, 0.816615819931, 135, 341, 0.670794785023, 132, 365, 0.780540108681, 128, 385, 0.814137935638, 141, 306, 0.177925795317]}, {"joints": [405, 284, 0.52686548233, 405, 298, 0.623795926571, 414, 306, 0.736892104149, 418, 326, 0.526854753494, 418, 344, 0.315674066544, 399, 305, 0.832540273666, 392, 326, 0.757319688797, 380, 339, 0.743686616421, 409, 346, 0.524510025978, 399, 371, 0.722547531128, 399, 395, 0.832679629326, 402, 345, 0.585395276546, 403, 372, 0.775037169456, 419, 390, 0.778844773769, 406, 306, 0.234650954604]}, {"joints": [369, 295, 0.6036901474, 368, 310, 0.603479743004, 376, 315, 0.740995645523, 380, 329, 0.485697984695, 370, 334, 0.175854563713, 361, 316, 0.79310631752, 354, 333, 0.615259408951, 351, 339, 0.255796700716, 372, 348, 0.525605559349, 373, 369, 0.676989197731, 382, 389, 0.740746855736, 360, 347, 0.509381949902, 356, 370, 0.656241238117, 357, 393, 0.819907128811, 369, 315, 0.233699262142]}]}, "COCO_train2014_000000036827.jpg": {"version": 0.1, "bodies": [{"joints": [361, 96, 0.664693593979, 367, 159, 0.644438028336, 334, 173, 0.775427997112, 323, 247, 0.697471022606, 304, 298, 0.677327096462, 404, 182, 0.758776426315, 413, 265, 0.746202170849, 391, 326, 0.686128020287, 334, 322, 0.674754321575, 330, 407, 0.811001181602, 334, 479, 0.818583607674, 374, 326, 0.647439479828, 377, 421, 0.769766330719, 430, 511, 0.744681596756, 371, 176, 0.182139545679]}]}}
--------------------------------------------------------------------------------
/data_generation/refinement_network/util/html.py:
--------------------------------------------------------------------------------
1 | import dominate
2 | from dominate.tags import *
3 | import os
4 |
5 |
6 | class HTML:
7 | def __init__(self, web_dir, title, reflesh=0):
8 | self.title = title
9 | self.web_dir = web_dir
10 | self.img_dir = os.path.join(self.web_dir, 'images')
11 | if not os.path.exists(self.web_dir):
12 | os.makedirs(self.web_dir)
13 | if not os.path.exists(self.img_dir):
14 | os.makedirs(self.img_dir)
15 | # print(self.img_dir)
16 |
17 | self.doc = dominate.document(title=title)
18 | if reflesh > 0:
19 | with self.doc.head:
20 | meta(http_equiv="reflesh", content=str(reflesh))
21 |
22 | def get_image_dir(self):
23 | return self.img_dir
24 |
25 | def add_header(self, str):
26 | with self.doc:
27 | h3(str)
28 |
29 | def add_table(self, border=1):
30 | self.t = table(border=border, style="table-layout: fixed;")
31 | self.doc.add(self.t)
32 |
33 | def add_images(self, ims, txts, links, width=400):
34 | self.add_table()
35 | with self.t:
36 | with tr():
37 | for im, txt, link in zip(ims, txts, links):
38 | with td(style="word-wrap: break-word;", halign="center", valign="top"):
39 | with p():
40 | with a(href=os.path.join('images', link)):
41 | img(style="width:%dpx" % width, src=os.path.join('images', im))
42 | br()
43 | p(txt)
44 |
45 | def save(self):
46 | html_file = '%s/index.html' % self.web_dir
47 | f = open(html_file, 'wt')
48 | f.write(self.doc.render())
49 | f.close()
50 |
51 |
52 | if __name__ == '__main__':
53 | html = HTML('web/', 'test_html')
54 | html.add_header('hello world')
55 |
56 | ims = []
57 | txts = []
58 | links = []
59 | for n in range(4):
60 | ims.append('image_%d.png' % n)
61 | txts.append('text_%d' % n)
62 | links.append('image_%d.png' % n)
63 | html.add_images(ims, txts, links)
64 | html.save()
65 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/datasets/generate_pascal_training_prior.py:
--------------------------------------------------------------------------------
1 | '''
2 | We use Pascal dataset, which has both keypoints and segmentation annotations, to generate prior for other dataset which has only keypoints information.
3 | In order to train our refinement network, we need to generate prior for each pascal image, this is what this code for.
4 |
5 | >>> python generate_pascal_training_prior.py --PASCALPoseFileRoot /path/to/pascal_pose_file.csv --PASCALMaskImgDir /path/to/pascal_mask_img --outputDir /path/to/output --n 5 --k 3
6 | >>>
7 | '''
8 |
9 | import argparse
10 | from generate_prior_util import *
11 |
12 |
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument("--PASCALPoseFileRoot", help="path to PASCAL pose file")
15 | parser.add_argument("--PASCALMaskImgDir", help="path to PASCAL mask images")
16 | parser.add_argument("--outputDir", help="where to put output files")
17 | parser.add_argument("--n", type=int, default=5, help="number of close images picked first time")
18 | parser.add_argument("--k", type=int, default=3, help="number of close images picked for prior generation in n picked images")
19 | opt = parser.parse_args()
20 |
21 | # load PASCAL pose
22 | pascal_poses, pascal_img_names, pascal_pose_dict = load_pascal_pose(opt.PASCALPoseFileRoot)
23 |
24 | if not os.path.exists(opt.outputDir):
25 | os.makedirs(opt.outputDir)
26 |
27 | for i in range(len(pascal_img_names)):
28 | pascal_name = pascal_img_names[i]
29 | print('processing', pascal_name)
30 | pascal_mask_img = cv2.imread(os.path.join(opt.PASCALMaskImgDir, pascal_name + ".png"), 0)
31 | if not os.path.exists(os.path.join(opt.outputDir, pascal_name)):
32 | os.makedirs(os.path.join(opt.outputDir, pascal_name))
33 | pascal_average_parsing = generate_prior_single_person([0, 0, pascal_mask_img.shape[1] - 1, pascal_mask_img.shape[0] - 1],
34 | pascal_pose_dict[pascal_name], opt.PASCALMaskImgDir, pascal_poses,
35 | pascal_img_names, pascal_pose_dict, opt.n, opt.k, exclude_self=True, save_dir=os.path.join(opt.outputDir, pascal_name))
36 | pascal_average_parsing = pascal_average_parsing[:, :, [2, 1, 0]]
37 | cv2.imwrite(os.path.join(opt.outputDir, pascal_name + ".png"), pascal_average_parsing)
38 | print(pascal_name, i)
39 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/data/image_folder.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Code from
3 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
4 | # Modified the original code so that it also loads images from the current
5 | # directory as well as the subdirectories
6 | ###############################################################################
7 |
8 | import torch.utils.data as data
9 |
10 | from PIL import Image
11 | import os
12 | import os.path
13 |
14 | IMG_EXTENSIONS = [
15 | '.jpg', '.JPG', '.jpeg', '.JPEG',
16 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
17 | ]
18 |
19 |
20 | def is_image_file(filename):
21 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
22 |
23 |
24 | def make_dataset(dir):
25 | images = []
26 | assert os.path.isdir(dir), '%s is not a valid directory' % dir
27 |
28 | for root, _, fnames in sorted(os.walk(dir)):
29 | for fname in fnames:
30 | if is_image_file(fname):
31 | path = os.path.join(root, fname)
32 | images.append(path)
33 |
34 | return images
35 |
36 |
37 | def default_loader(path):
38 | return Image.open(path).convert('RGB')
39 |
40 |
41 | class ImageFolder(data.Dataset):
42 |
43 | def __init__(self, root, transform=None, return_paths=False,
44 | loader=default_loader):
45 | imgs = make_dataset(root)
46 | if len(imgs) == 0:
47 | raise(RuntimeError("Found 0 images in: " + root + "\n"
48 | "Supported image extensions are: " +
49 | ",".join(IMG_EXTENSIONS)))
50 |
51 | self.root = root
52 | self.imgs = imgs
53 | self.transform = transform
54 | self.return_paths = return_paths
55 | self.loader = loader
56 |
57 | def __getitem__(self, index):
58 | path = self.imgs[index]
59 | img = self.loader(path)
60 | if self.transform is not None:
61 | img = self.transform(img)
62 | if self.return_paths:
63 | return img, path
64 | else:
65 | return img
66 |
67 | def __len__(self):
68 | return len(self.imgs)
69 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/train.py:
--------------------------------------------------------------------------------
1 | import time
2 | from options.train_options import TrainOptions
3 | from data.data_loader import CreateDataLoader
4 | from models.models import create_model
5 | from util.visualizer import Visualizer
6 |
7 | opt = TrainOptions().parse()
8 | data_loader = CreateDataLoader(opt)
9 | dataset = data_loader.load_data()
10 | dataset_size = len(data_loader)
11 | print('#training images = %d' % dataset_size)
12 |
13 | model = create_model(opt)
14 | visualizer = Visualizer(opt)
15 | total_steps = 0
16 |
17 | for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1):
18 | epoch_start_time = time.time()
19 | iter_data_time = time.time()
20 | epoch_iter = 0
21 |
22 | for i, data in enumerate(dataset):
23 | iter_start_time = time.time()
24 | if total_steps % opt.print_freq == 0:
25 | t_data = iter_start_time - iter_data_time
26 | visualizer.reset()
27 | total_steps += opt.batchSize
28 | epoch_iter += opt.batchSize
29 | model.set_input(data)
30 | model.optimize_parameters()
31 |
32 | if total_steps % opt.display_freq == 0:
33 | save_result = total_steps % opt.update_html_freq == 0
34 | visualizer.display_current_results(model.get_current_visuals(), epoch, save_result)
35 |
36 | if total_steps % opt.print_freq == 0:
37 | errors = model.get_current_errors()
38 | t = (time.time() - iter_start_time) / opt.batchSize
39 | visualizer.print_current_errors(epoch, epoch_iter, errors, t, t_data)
40 | if opt.display_id > 0:
41 | visualizer.plot_current_errors(epoch, float(epoch_iter) / dataset_size, opt, errors)
42 |
43 | if total_steps % opt.save_latest_freq == 0:
44 | print('saving the latest model (epoch %d, total_steps %d)' %
45 | (epoch, total_steps))
46 | model.save('latest')
47 |
48 | iter_data_time = time.time()
49 | if epoch % opt.save_epoch_freq == 0:
50 | print('saving the model at the end of epoch %d, iters %d' %
51 | (epoch, total_steps))
52 | model.save('latest')
53 | model.save(epoch)
54 |
55 | print('End of epoch %d / %d \t Time Taken: %d sec' %
56 | (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time))
57 | model.update_learning_rate()
58 |
--------------------------------------------------------------------------------
/data_generation/pick_full_person.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import numpy as np
4 | from tqdm import tqdm
5 | import matplotlib.pyplot as plt
6 | import os
7 | from PIL import Image
8 | import json
9 | import shutil
10 | import argparse
11 |
12 | def parse_args():
13 | """Parse input arguments."""
14 | parser = argparse.ArgumentParser(description='')
15 | parser.add_argument('--outputpath',dest='outputpath', help='path of output', default="")
16 | parser.add_argument('--inputpath',dest='inputpath', help='path of inputpath', default="")
17 | args = parser.parse_args()
18 | return args
19 |
20 | def filter_pose(intputpath, outputpath, imgname):
21 | save = True
22 | for pid in range(len(rmpe_results[imgname])):
23 | pose = np.array(rmpe_results[imgname][pid]['keypoints']).reshape(-1,3)[:,:3]
24 | for idx_c in range(16):
25 | if (pose[idx_c,2]) < 0.15:
26 | save = False
27 | break
28 | if save == False:
29 | break
30 | if save == False:
31 | return False
32 | return True
33 |
34 |
35 | if __name__ == '__main__':
36 | args = parse_args()
37 | outputpath = args.outputpath
38 | inputpath = args.inputpath
39 | jsonpath = os.path.join(args.outputpath,"POSE/alpha-pose-results-forvis.json")
40 |
41 | result3={}
42 | with open(jsonpath) as f:
43 | rmpe_results = json.load(f)
44 | for imgname in tqdm(rmpe_results.keys()):
45 | if filter_pose(inputpath, outputpath, imgname):
46 | for pid in range(len(rmpe_results[imgname])):
47 | if imgname not in result3.keys():
48 | result3[imgname]={}
49 | result3[imgname]['version']=0.1
50 | result3[imgname]['bodies']=[]
51 | tmp={'joints':[]}
52 | indexarr=[27,24,36,33,30,39,42,45,6,3,0,9,12,15,21]
53 | for i in indexarr:
54 | tmp['joints'].append(rmpe_results[imgname][pid]['keypoints'][i])
55 | tmp['joints'].append(rmpe_results[imgname][pid]['keypoints'][i+1])
56 | tmp['joints'].append(rmpe_results[imgname][pid]['keypoints'][i+2])
57 | result3[imgname]['bodies'].append(tmp)
58 | with open("full-person.json",'w') as json_file:
59 | json_file.write(json.dumps(result3))
--------------------------------------------------------------------------------
/data_generation/refinement_network/models/test_model.py:
--------------------------------------------------------------------------------
1 | from torch.autograd import Variable
2 | from collections import OrderedDict
3 | import util.util as util
4 | from .base_model import BaseModel
5 | from . import networks
6 |
7 |
8 | class TestModel(BaseModel):
9 | def name(self):
10 | return 'TestModel'
11 |
12 | def initialize(self, opt):
13 | assert(not opt.isTrain)
14 | BaseModel.initialize(self, opt)
15 | self.netG = networks.define_G(opt.input_nc, opt.output_nc,
16 | opt.ngf, opt.which_model_netG,
17 | opt.norm, not opt.no_dropout,
18 | opt.init_type,
19 | self.gpu_ids)
20 | which_epoch = opt.which_epoch
21 | self.load_network(self.netG, 'G', which_epoch)
22 |
23 | print('---------- Networks initialized -------------')
24 | networks.print_network(self.netG)
25 | print('-----------------------------------------------')
26 |
27 | def set_input(self, input):
28 | # we need to use single_dataset mode
29 | input_A = input['A']
30 | if len(self.gpu_ids) > 0:
31 | input_A = input_A.cuda(self.gpu_ids[0], async=True)
32 | self.input_A = input_A
33 | self.image_paths = input['A_paths']
34 |
35 | def test(self):
36 | self.real_A = Variable(self.input_A)
37 | self.fake_B = self.netG(self.real_A)
38 |
39 | # get image paths
40 | def get_image_paths(self):
41 | return self.image_paths
42 |
43 | def get_current_visuals(self):
44 | real_A_img, real_A_prior = util.tensor2im(self.real_A.data)
45 | fake_B = util.tensor2im(self.fake_B.data)
46 | if self.opt.output_nc == 1:
47 | fake_B_postprocessed = util.postprocess_parsing(fake_B, self.isTrain)
48 | fake_B_color = util.paint_color(fake_B_postprocessed)
49 | if self.opt.output_nc == 1:
50 | return OrderedDict([
51 | ('real_A_img', real_A_img),
52 | ('real_A_prior', real_A_prior),
53 | ('fake_B', fake_B),
54 | ('fake_B_postprocessed', fake_B_postprocessed),
55 | ('fake_B_color', fake_B_color)]
56 | )
57 | else:
58 | return OrderedDict([
59 | ('real_A_img', real_A_img),
60 | ('real_A_prior', real_A_prior),
61 | ('fake_B', fake_B)]
62 | )
63 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/shapes.py:
--------------------------------------------------------------------------------
1 | import math
2 | from collections import namedtuple
3 |
4 | from .errors import KaffeError
5 |
6 | TensorShape = namedtuple('TensorShape', ['batch_size', 'channels', 'height', 'width'])
7 |
8 |
9 | def get_filter_output_shape(i_h, i_w, params, round_func):
10 | o_h = (i_h + 2 * params.pad_h - params.kernel_h) / float(params.stride_h) + 1
11 | o_w = (i_w + 2 * params.pad_w - params.kernel_w) / float(params.stride_w) + 1
12 | return (int(round_func(o_h)), int(round_func(o_w)))
13 |
14 |
15 | def get_strided_kernel_output_shape(node, round_func):
16 | assert node.layer is not None
17 | input_shape = node.get_only_parent().output_shape
18 | o_h, o_w = get_filter_output_shape(input_shape.height, input_shape.width,
19 | node.layer.kernel_parameters, round_func)
20 | params = node.layer.parameters
21 | has_c_o = hasattr(params, 'num_output')
22 | c = params.num_output if has_c_o else input_shape.channels
23 | return TensorShape(input_shape.batch_size, c, o_h, o_w)
24 |
25 |
26 | def shape_not_implemented(node):
27 | raise NotImplementedError
28 |
29 |
30 | def shape_identity(node):
31 | assert len(node.parents) > 0
32 | return node.parents[0].output_shape
33 |
34 |
35 | def shape_scalar(node):
36 | return TensorShape(1, 1, 1, 1)
37 |
38 |
39 | def shape_data(node):
40 | if node.output_shape:
41 | # Old-style input specification
42 | return node.output_shape
43 | try:
44 | # New-style input specification
45 | return map(int, node.parameters.shape[0].dim)
46 | except:
47 | # We most likely have a data layer on our hands. The problem is,
48 | # Caffe infers the dimensions of the data from the source (eg: LMDB).
49 | # We want to avoid reading datasets here. Fail for now.
50 | # This can be temporarily fixed by transforming the data layer to
51 | # Caffe's "input" layer (as is usually used in the "deploy" version).
52 | # TODO: Find a better solution for this.
53 | raise KaffeError('Cannot determine dimensions of data layer.\n'
54 | 'See comments in function shape_data for more info.')
55 |
56 |
57 | def shape_mem_data(node):
58 | params = node.parameters
59 | return TensorShape(params.batch_size, params.channels, params.height, params.width)
60 |
61 |
62 | def shape_concat(node):
63 | axis = node.layer.parameters.axis
64 | output_shape = None
65 | for parent in node.parents:
66 | if output_shape is None:
67 | output_shape = list(parent.output_shape)
68 | else:
69 | output_shape[axis] += parent.output_shape[axis]
70 | return tuple(output_shape)
71 |
72 |
73 | def shape_convolution(node):
74 | return get_strided_kernel_output_shape(node, math.floor)
75 |
76 |
77 | def shape_pool(node):
78 | return get_strided_kernel_output_shape(node, math.ceil)
79 |
80 |
81 | def shape_inner_product(node):
82 | input_shape = node.get_only_parent().output_shape
83 | return TensorShape(input_shape.batch_size, node.layer.parameters.num_output, 1, 1)
84 |
--------------------------------------------------------------------------------
/parsing_network/README.md:
--------------------------------------------------------------------------------
1 | # Parsing Network
2 |
3 | ## Requirements
4 |
5 | TensorFlow (version >= 1.1.0) needs to be installed before running the scripts.
6 | To install the required python packages (except TensorFlow), run
7 | ```bash
8 | pip install -r requirements.txt
9 | ```
10 | or for a local installation
11 | ```bash
12 | pip install -user -r requirements.txt
13 | ```
14 |
15 | ## Preparation
16 |
17 | To train the network, you first need to download the data from [here](https://pan.baidu.com/s/1ywqpcsvPTsjIY_Slsl9Zhg)(code: 6wqq). It contains data from original Pascal-Person-Part dataset and our generated data. Our released dataset contains over 150K images, which we believe can facilitate the research in the area of human parsing. After downloading the dataset, unzip it to folder `dataset/`.
18 |
19 | Besides, download the pre-trained weights on COCO dataset from
20 | [Baidu Pan](https://pan.baidu.com/s/1AoGJbZ4YHEbV0x89K1-8nQ)(code:r0yb)
21 | or
22 | [Gdrive](https://drive.google.com/file/d/1aPb2rilhYestHL_7jJglmCHv7ScIG8MQ/view?usp=sharingand),
23 | unzip it under current folder.
24 |
25 | ## Training
26 |
27 | Our training method consists of two steps. First, we train our network on the whole dataset. Then we finetune the model on the original dataset. Note that due to time limitation, we only train the network on the whole dataset for 10 epochs. There may be potential performance gains if we train for more epochs.
28 | ```bash
29 | ## Train our network on the whole dataset, model.ckpt-50000 is the pre-trained weights on COCO dataset
30 | python train.py --data-dir ./dataset/ --data-list dataset/train_all.txt --num-epochs 10 --restore-from models/model.ckpt-50000 --not-restore-last --snapshot-dir snapshots-new-fromcoco --random-scale --random-mirror --save-pred-every 50000
31 |
32 | ## Finetune the model on the original dataset
33 | python train.py --data-dir ./dataset/ --data-list dataset/pascal_train.txt --num-epochs 90 --restore-from snapshots-new-fromcoco/model.ckpt-213129 --snapshot-dir snapshots-new-fromcoco-finetune --random-scale --random-mirror --save-pred-every 10000
34 | ```
35 |
36 | It takes about two days to train a model on a single Titan X GPU card.
37 |
38 |
39 | ## Evaluation
40 | We provide a pretrained model in `models/final_model/`.
41 | Run the following command to get the evaluation result of Pascal-Person-Part dataset, it should achieve 64.3% of mean intersection-over-union:
42 | ```bash
43 | python evaluate.py --data-dir ./dataset/ --restore-from ./models/final_model/model.ckpt-19315
44 | ```
45 |
46 | ## Inference
47 |
48 | To perform inference over your own images, use the following command:
49 | ```bash
50 | python inference.py /path/to/img/folder /path/to/model --data_list /path/to/data/list
51 | ```
52 | This will run the forward pass and save the renderred result at `\output` folder:
53 |
54 |
55 |
56 |
57 |
58 | ## Acknowledgement
59 |
60 | This implementation of [DeepLabV2-ResNet](http://liangchiehchen.com/projects/DeepLabv2_resnet.html) is originally from [tensorflow-deeplab-resnet](https://github.com/DrSleep/tensorflow-deeplab-resnet). Thanks DrSleep for his sharing!
61 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/options/train_options.py:
--------------------------------------------------------------------------------
1 | from .base_options import BaseOptions
2 |
3 |
4 | class TrainOptions(BaseOptions):
5 | def initialize(self):
6 | BaseOptions.initialize(self)
7 | self.parser.add_argument('--display_freq', type=int, default=500, help='frequency of showing training results on screen')
8 | self.parser.add_argument('--display_single_pane_ncols', type=int, default=0, help='if positive, display all images in a single visdom web panel with certain number of images per row.')
9 | self.parser.add_argument('--update_html_freq', type=int, default=500, help='frequency of saving training results to html')
10 | self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console')
11 | self.parser.add_argument('--save_latest_freq', type=int, default=30000, help='frequency of saving the latest results')
12 | self.parser.add_argument('--save_epoch_freq', type=int, default=5, help='frequency of saving checkpoints at the end of epochs')
13 | self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model')
14 | self.parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by , +, ...')
15 | self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc')
16 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
17 | self.parser.add_argument('--niter', type=int, default=200, help='# of iter at starting learning rate')
18 | self.parser.add_argument('--niter_decay', type=int, default=0, help='# of iter to linearly decay learning rate to zero')
19 | self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam')
20 | self.parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam')
21 | self.parser.add_argument('--no_lsgan', action='store_true', help='do *not* use least square GAN, if false, use vanilla GAN')
22 | self.parser.add_argument('--no_gan', action='store_true', help='do *not* use gan loss')
23 | self.parser.add_argument('--n', type=int, default=5)
24 | self.parser.add_argument('--k', type=int, default=3)
25 | self.parser.add_argument('--use_l2', action='store_true', help='use l2 loss for netG')
26 | self.parser.add_argument('--shuffle', action='store_true', help='shuffle index for prior')
27 | self.parser.add_argument('--lambda_A', type=float, default=100.0, help='weight for reference loss')
28 | self.parser.add_argument('--pool_size', type=int, default=0, help='the size of image buffer that stores previously generated images')
29 | self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/')
30 | self.parser.add_argument('--lr_policy', type=str, default='lambda', help='learning rate policy: lambda|step|plateau')
31 | self.parser.add_argument('--lr_decay_iters', type=int, default=50, help='multiply by a gamma every lr_decay_iters iterations')
32 |
33 | self.isTrain = True
34 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/util/util.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import torch
3 | import numpy as np
4 | from PIL import Image
5 | import os
6 |
7 |
8 | # Converts a Tensor into a Numpy array
9 | # |imtype|: the desired type of the converted numpy array
10 | def tensor2im(image_tensor, imtype=np.uint8):
11 | image_numpy = image_tensor[0].cpu().float().numpy()
12 | if image_numpy.shape[0] > 3:
13 | image_numpy_1 = image_numpy[0:3, :, :]
14 | image_numpy_2 = image_numpy[3:, :, :]
15 | image_numpy_1 = (np.transpose(image_numpy_1, (1, 2, 0)) + 1) / 2.0 * 255.0
16 | image_numpy_2 = (np.transpose(image_numpy_2, (1, 2, 0)) + 1) / 2.0 * 255.0
17 | return image_numpy_1.astype(imtype), image_numpy_2.astype(imtype)
18 |
19 | if image_numpy.shape[0] == 1:
20 | image_numpy = np.tile(image_numpy, (3, 1, 1))
21 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
22 | return image_numpy.astype(imtype)
23 |
24 |
25 | body_part_color = np.array([
26 | [0, 0, 0],
27 | [128, 0, 0],
28 | [0, 128, 0],
29 | [128, 128, 0],
30 | [0, 0, 128],
31 | [128, 0, 128],
32 | [0, 128, 128],
33 | [128, 128, 128],
34 | [64, 0, 0],
35 | [192, 0, 0],
36 | [64, 128, 0]],
37 | dtype=np.uint8)
38 |
39 |
40 | def postprocess_parsing(image_numpy, isTrain):
41 | imtype = image_numpy.dtype
42 | image_numpy = image_numpy[:, :, 0:1]
43 | image_numpy = np.tile(image_numpy, (1, 1, 7)).astype(int)
44 | standard = np.arange(7)
45 | standard *= 35
46 | standard = standard.reshape((1, 1, 7))
47 | standard = np.tile(standard, (image_numpy.shape[0], image_numpy.shape[1], 1))
48 | diff = np.abs(image_numpy - standard)
49 | min_index = np.argmin(diff, axis=2).reshape(((image_numpy.shape[0], image_numpy.shape[1], 1)))
50 | image_numpy = np.tile(min_index, (1, 1, 3))
51 | if isTrain:
52 | image_numpy = image_numpy * 35
53 | return image_numpy.astype(imtype)
54 |
55 |
56 | def paint_color(image_numpy):
57 | image_np = image_numpy[:, :, 0]
58 | if image_np.max() > 10:
59 | image_np = image_np / 35
60 | image_np = image_np.astype(np.uint8)
61 | return body_part_color[image_np]
62 |
63 |
64 | def diagnose_network(net, name='network'):
65 | mean = 0.0
66 | count = 0
67 | for param in net.parameters():
68 | if param.grad is not None:
69 | mean += torch.mean(torch.abs(param.grad.data))
70 | count += 1
71 | if count > 0:
72 | mean = mean / count
73 | print(name)
74 | print(mean)
75 |
76 |
77 | def save_image(image_numpy, image_path):
78 | image_pil = Image.fromarray(image_numpy)
79 | image_pil.save(image_path)
80 |
81 |
82 | def print_numpy(x, val=True, shp=False):
83 | x = x.astype(np.float64)
84 | if shp:
85 | print('shape,', x.shape)
86 | if val:
87 | x = x.flatten()
88 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
89 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
90 |
91 |
92 | def mkdirs(paths):
93 | if isinstance(paths, list) and not isinstance(paths, str):
94 | for path in paths:
95 | mkdir(path)
96 | else:
97 | mkdir(paths)
98 |
99 |
100 | def mkdir(path):
101 | if not os.path.exists(path):
102 | os.makedirs(path)
103 |
--------------------------------------------------------------------------------
/parsing_network/real-time-inference.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import tensorflow as tf
4 | from deeplab_resnet import DeepLabResNetModel, decode_labels, prepare_label
5 | import argparse
6 | import time
7 |
8 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
9 |
10 | NUM_CLASSES = 7
11 | IMG_SIZE = 512
12 |
13 | input_feed_shape = (1, IMG_SIZE, IMG_SIZE, 3)
14 |
15 | def get_arguments():
16 | """Parse all the arguments provided from the CLI.
17 |
18 | Returns:
19 | A list of parsed arguments.
20 | """
21 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network Inference.")
22 | parser.add_argument("--model_weights", type=str, default='./final_model/',
23 | help="Path to the file with model weights.")
24 | parser.add_argument("--num_classes", type=int, default=NUM_CLASSES,
25 | help="Number of classes to predict (including background).")
26 | return parser.parse_args()
27 |
28 |
29 | def load(saver, sess, ckpt_path):
30 | '''Load trained weights.
31 |
32 | Args:
33 | saver: TensorFlow saver object.
34 | sess: TensorFlow session.
35 | ckpt_path: path to checkpoint file with parameters.
36 | '''
37 | saver.restore(sess, ckpt_path)
38 | print("Restored model parameters from {}".format(ckpt_path))
39 |
40 | args = get_arguments()
41 |
42 | img_tf = tf.placeholder(dtype=tf.float32, shape=input_feed_shape)
43 |
44 | net = DeepLabResNetModel({'data': img_tf}, is_training=False, num_classes=args.num_classes)
45 |
46 | restore_var = tf.global_variables()
47 |
48 | # Predictions.
49 | raw_output = net.layers['fc1_voc12']
50 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(np.zeros( shape=(IMG_SIZE, IMG_SIZE, 3) ) )[0:2,])
51 | raw_output_up = tf.argmax(raw_output_up, dimension=3)
52 | pred = tf.expand_dims(raw_output_up, dim=3)
53 |
54 | config = tf.ConfigProto()
55 | config.gpu_options.allow_growth = True
56 | sess = tf.Session(config=config)
57 | init = tf.global_variables_initializer()
58 |
59 | sess.run(init)
60 |
61 | # Load weights.
62 | ckpt = tf.train.get_checkpoint_state(args.model_weights)
63 | loader = tf.train.Saver(var_list=restore_var)
64 | load(loader, sess, ckpt.model_checkpoint_path)
65 |
66 |
67 | def process_frame(frame):
68 | frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
69 | input_img_feed = np.array(frame, dtype=float)
70 | input_img_feed = np.expand_dims(input_img_feed, axis=0)
71 |
72 | start_time = time.time()
73 | preds = sess.run(pred, feed_dict={img_tf: input_img_feed})
74 | elapsed_time = time.time() - start_time
75 | print("FPS: ", 1 / elapsed_time)
76 | msk = decode_labels(preds, num_classes=NUM_CLASSES)
77 | im = msk[0]
78 | final = cv2.addWeighted(im,0.9,frame,0.7,0)
79 | return final
80 |
81 | def main():
82 |
83 | cap = cv2.VideoCapture(0)
84 |
85 | i = 1
86 | while(True):
87 | i += 1
88 | # Capture frame-by-frame
89 | ret, frame = cap.read()
90 |
91 | frame_out = process_frame(frame)
92 | #frame_out = cv2.resize(frame, (512,512))
93 | # Display the resulting frame
94 | cv2.imshow('frame',frame_out)
95 | if cv2.waitKey(1) & 0xFF == ord('q'):
96 | break
97 |
98 | # When everything done, release the capture
99 | cap.release()
100 | cv2.destroyAllWindows()
101 |
102 | if __name__ == '__main__':
103 | main()
104 |
--------------------------------------------------------------------------------
/data_generation/merge_parsing_result.py:
--------------------------------------------------------------------------------
1 | '''
2 | Merge parsing result of cropped poses together to be the label of the whole origin image.
3 |
4 | >>> python merge_parsing_result.py --outputDir /path/to/output --parsing_root /root_of_refinement_network/results/${experiment_name}/test_latest/images --origin_img_root /path/to/origin_img --json_file_root /path/to/pose_json_file --aug 0.25
5 | >>>
6 | '''
7 |
8 | import numpy as np
9 | import os
10 | import cv2
11 | import argparse
12 | import json
13 |
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument("--json_file_root", help="path to json file")
16 | parser.add_argument("--origin_img_root", help="path to origin img")
17 | parser.add_argument("--parsing_root", help="path to parsing results")
18 | parser.add_argument("--outputDir", help="where to put output files")
19 | parser.add_argument("--factor", type=int, default=1, help='multiply factor')
20 | parser.add_argument("--aug", type=float, default=0.25, help='augmentation factor for crop')
21 | opt = parser.parse_args()
22 |
23 | origin_img_root = opt.origin_img_root
24 | json_file_root = opt.json_file_root
25 | parsing_root = opt.parsing_root
26 |
27 | json_file = open(json_file_root, "r")
28 | json_string = json_file.readline()
29 | json_dict = json.loads(json_string)
30 |
31 | if not os.path.exists(opt.outputDir):
32 | os.makedirs(opt.outputDir)
33 |
34 | # coco to pascal keypoints order
35 | coco2pascal = [9, 8, 12, 11, 10, 13, 14, 15, 2, 1, 0, 3, 4, 5, 7]
36 | # the 6th keypoint is missing in coco
37 |
38 | num_images = 0
39 | for k, v in json_dict.items():
40 | num_images += 1
41 | image_id = k
42 | origin_img = cv2.imread(os.path.join(origin_img_root, image_id))
43 | all_prior = np.zeros(origin_img.shape, dtype=np.uint8)
44 | bodies = v["bodies"]
45 | for i in range(len(bodies)):
46 | '''
47 | The following process of raw_pose and bbox should be the same as in crop_pose_and_generate_testing_prior.py
48 | '''
49 | body = bodies[i]
50 | keypoints = body["joints"]
51 | raw_pose = np.zeros((1, 32), dtype=float)
52 | min_x = keypoints[0]
53 | max_x = min_x
54 | min_y = keypoints[1]
55 | max_y = min_y
56 | for j in range(15):
57 | x = keypoints[3*j]
58 | y = keypoints[3*j+1]
59 | raw_pose[0][2*coco2pascal[j]] = x
60 | raw_pose[0][2*coco2pascal[j]+1] = y
61 | if x < min_x:
62 | min_x = x
63 | elif x > max_x:
64 | max_x = x
65 | if y < min_y:
66 | min_y = y
67 | elif y > max_y:
68 | max_y = y
69 | raw_pose[0][2*6] = (raw_pose[0][2*2] + raw_pose[0][2*3]) / 2
70 | raw_pose[0][2*6+1] = (raw_pose[0][2*2+1] + raw_pose[0][2*3+1]) / 2
71 | if max_x > origin_img.shape[1] or max_y > origin_img.shape[0]-1:
72 | print(max_x, max_y)
73 | print(image_id + " pose outside img")
74 |
75 | # deal with bbox
76 | bbox = [min_x, min_y, max_x, max_y]
77 | xaug = int((max_x - min_x + 1) * opt.aug)
78 | yaug = int((max_y - min_y + 1) * opt.aug)
79 | bbox[0] = max(bbox[0] - xaug, 0)
80 | bbox[1] = max(bbox[1] - yaug, 0)
81 | bbox[2] = min(bbox[2] + xaug, origin_img.shape[1]-1)
82 | bbox[3] = min(bbox[3] + yaug, origin_img.shape[0]-1)
83 | print('bbox', bbox)
84 |
85 | prior = cv2.imread(os.path.join(parsing_root, image_id.split('.')[0] + '_' + str(i) + '_fake_B_postprocessed.png'))
86 | prior = cv2.resize(prior, (bbox[2]+1-bbox[0], bbox[3]+1-bbox[1]), interpolation=cv2.INTER_NEAREST)
87 | all_prior[bbox[1]:bbox[3]+1, bbox[0]:bbox[2]+1] = np.maximum(all_prior[bbox[1]:bbox[3]+1, bbox[0]:bbox[2]+1], prior)
88 |
89 | print(image_id, i, num_images)
90 | # all_prior = all_prior + (all_prior == 0) * 255
91 | cv2.imwrite(os.path.join(opt.outputDir, image_id[:len(image_id)-3]+'png'), all_prior[:, :, 0])
92 |
93 | print('finished')
94 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/data/aligned_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import random
3 | import torchvision.transforms as transforms
4 | import torch
5 | from data.base_dataset import BaseDataset
6 | from data.image_folder import make_dataset
7 | from PIL import Image
8 | import ntpath
9 | import numpy as np
10 |
11 |
12 | class AlignedDataset(BaseDataset):
13 | def initialize(self, opt):
14 | self.opt = opt
15 | self.root = opt.dataroot
16 | self.dir_AB = os.path.join(opt.dataroot)
17 | self.dir_img = os.path.join(self.dir_AB, 'img')
18 | self.dir_priors = os.path.join(self.dir_AB, 'prior')
19 | self.dir_parsing = os.path.join(self.dir_AB, 'parsing')
20 |
21 | self.img_paths = sorted(make_dataset(self.dir_img))
22 | self.parsing_paths = sorted(make_dataset(self.dir_parsing))
23 | assert(opt.resize_or_crop == 'resize_and_crop')
24 |
25 | def __getitem__(self, index):
26 | # img
27 | img_path = self.img_paths[index % self.__len__()]
28 | short_path = ntpath.basename(img_path)
29 | img_name = os.path.splitext(short_path)[0]
30 | img = Image.open(img_path).convert('RGB')
31 | img = img.resize((self.opt.loadSize, self.opt.loadSize), Image.BICUBIC)
32 | img = transforms.ToTensor()(img)
33 |
34 | # prior
35 | prior_indexes = np.arange(self.opt.n)
36 | if self.opt.shuffle:
37 | random.shuffle(prior_indexes)
38 | prior = torch.zeros(3, self.opt.loadSize, self.opt.loadSize)
39 | for i in range(self.opt.k):
40 | morphed_prior_path = os.path.join(self.dir_priors, img_name, str(prior_indexes[i]) + '.png')
41 | morphed_prior = Image.open(morphed_prior_path).convert('RGB')
42 | morphed_prior = morphed_prior.resize((self.opt.loadSize, self.opt.loadSize), Image.BICUBIC)
43 | morphed_prior = transforms.ToTensor()(morphed_prior)
44 | prior += morphed_prior
45 | prior /= self.opt.k
46 |
47 | # parsing
48 | parsing_path = self.parsing_paths[index % self.__len__()]
49 | parsing = Image.open(parsing_path).convert('RGB')
50 | parsing = parsing.resize((self.opt.loadSize, self.opt.loadSize), Image.NEAREST)
51 | parsing = transforms.ToTensor()(parsing)
52 |
53 | w = img.size(2)
54 | h = img.size(1)
55 | w_offset = random.randint(0, max(0, w - self.opt.fineSize - 1))
56 | h_offset = random.randint(0, max(0, h - self.opt.fineSize - 1))
57 |
58 | img = img[:, h_offset:h_offset + self.opt.fineSize, w_offset:w_offset + self.opt.fineSize]
59 | prior = prior[:, h_offset:h_offset + self.opt.fineSize, w_offset:w_offset + self.opt.fineSize]
60 | parsing = parsing[:, h_offset:h_offset + self.opt.fineSize, w_offset:w_offset + self.opt.fineSize]
61 |
62 | img = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(img)
63 | prior = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(prior)
64 | parsing = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(parsing)
65 |
66 | if self.opt.which_direction == 'BtoA':
67 | input_nc = self.opt.output_nc
68 | output_nc = self.opt.input_nc
69 | else:
70 | input_nc = self.opt.input_nc
71 | output_nc = self.opt.output_nc
72 |
73 | if (not self.opt.no_flip) and random.random() < 0.5:
74 | idx = [i for i in range(img.size(2) - 1, -1, -1)]
75 | idx = torch.LongTensor(idx)
76 | img = img.index_select(2, idx)
77 | prior = prior.index_select(2, idx)
78 | parsing = parsing.index_select(2, idx)
79 |
80 | A = torch.cat([img, prior], dim=0)
81 | B = parsing
82 | if output_nc == 1: # RGB to gray
83 | tmp = B[0, ...] * 0.299 + B[1, ...] * 0.587 + B[2, ...] * 0.114
84 | B = tmp.unsqueeze(0)
85 |
86 | return {'A': A, 'B': B,
87 | 'A_paths': img_path, 'B_paths': parsing_path}
88 |
89 | def __len__(self):
90 | return min(len(self.img_paths), len(self.parsing_paths))
91 |
92 | def name(self):
93 | return 'AlignedDataset'
94 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/README.md:
--------------------------------------------------------------------------------
1 | # Refinement Network
2 |
3 | This is the code for the Refinement Network. We use Refinement Network to generate parsing label for single person.
4 |
5 | ## Prerequisites
6 | - Linux or macOS
7 | - Python 3
8 | - CPU or NVIDIA GPU + CUDA CuDNN
9 |
10 | ## Getting Started
11 | ### Installation
12 | - Install PyTorch 0.3.0 and dependencies from http://pytorch.org
13 | - Install Torch vision from the source.
14 | ```bash
15 | git clone https://github.com/pytorch/vision
16 | cd vision
17 | python3 setup.py install
18 | ```
19 |
20 | - Install python libraries [visdom](https://github.com/facebookresearch/visdom) and [dominate](https://github.com/Knio/dominate).
21 | ```bash
22 | pip3 install visdom
23 | pip3 install dominate
24 | ```
25 |
26 | - Clone this repo:
27 | ```bash
28 | git clone https://github.com/MVIG-SJTU/WSHP
29 | cd WSHP/data_generation/refinement_network
30 | ```
31 |
32 | ### Train/Test
33 | #### Train
34 | - Prepare a training dataset, which should have the following directories:
35 | ```
36 | /dataroot
37 | /img
38 | img1.ext
39 | img2.ext
40 | ...
41 | /parsing
42 | img1.ext
43 | img2.ext
44 | ...
45 | /prior
46 | /img1
47 | 0.png
48 | 1.png
49 | ...
50 | /img2
51 | 0.png
52 | 1.png
53 | ...
54 | ...
55 | ```
56 | In our project, we use `Pascal` dataset to train our Refinement Network. But to train refinement network, besides origin image and parsing label, we also need prior images. In `datasets` directory, we give some code to show how to do this. You can use any dataset which has both two kinds of annotations. [Here](https://drive.google.com/open?id=1Ck8_1m74aLGDhawIbsdYsCee9_sFiBcE) is some data you can use.
57 |
58 | - Train a model:
59 | ```bash
60 | #!./scripts/train.sh
61 | python3 train.py --dataroot /path/to/dataset --dataset_mode aligned --model pix2pix --no_gan --shuffle --n 5 --k 3 --output_nc 1 --name exp1
62 | ```
63 |
64 | - To view training results and loss plots, run `python3 -m visdom.server` and click the URL http://localhost:8097. To see more intermediate results, check out `./checkpoints/exp1/web/index.html`
65 |
66 | #### Test
67 | - Prepare a testing dataset, which should have the following directories:
68 | ```
69 | /dataroot
70 | /img
71 | img1.ext
72 | img2.ext
73 | ...
74 | /prior
75 | img1.ext
76 | img2.ext
77 | ...
78 | ```
79 | For more details, please refer to the [parent module](https://github.com/Fang-Haoshu/WSHP/data_generation) where we discuss how to generate prior for dataset which has only keypoints information.
80 |
81 | - Test the model:
82 | ```bash
83 | #!./scripts/test.sh
84 | python3 test.py --dataroot /path/to/dataset --dataset_mode single --model test --output_nc 1 --name exp1
85 | ```
86 | The test results will be saved to a html file here: `./results/exp1/test_latest/index.html`.
87 |
88 | ## Training/Test Details
89 | - Flags: see `options/train_options.py` and `options/base_options.py` for all the training flags; see `options/test_options.py` and `options/base_options.py` for all the test flags.
90 | - CPU/GPU (default `--gpu_ids 0`): set`--gpu_ids -1` to use CPU mode; set `--gpu_ids 0,1,2` for multi-GPU mode. You need a large batch size (e.g. `--batchSize 32`) to benefit from multiple GPUs.
91 | - Visualization: during training, the current results can be viewed using two methods. First, if you set `--display_id` > 0, the results and loss plot will appear on a local graphics web server launched by [visdom](https://github.com/facebookresearch/visdom). To do this, you should have `visdom` installed and a server running by the command `python3 -m visdom.server`. The default server URL is `http://localhost:8097`. `display_id` corresponds to the window ID that is displayed on the `visdom` server. The `visdom` display functionality is turned on by default. To avoid the extra overhead of communicating with `visdom` set `--display_id 0`. Second, the intermediate results are saved to `[opt.checkpoints_dir]/[opt.name]/web/` as an HTML file. To avoid this, set `--no_html`.
92 | - Preprocessing: images can be resized and cropped in different ways using `--resize_or_crop` option. The default option `'resize_and_crop'` resizes the image to be of size `(opt.loadSize, opt.loadSize)` and does a random crop of size `(opt.fineSize, opt.fineSize)`. `'crop'` skips the resizing step and only performs random cropping. `'scale_width'` resizes the image to have width `opt.fineSize` while keeping the aspect ratio. `'scale_width_and_crop'` first resizes the image to have width `opt.loadSize` and then does random cropping of size `(opt.fineSize, opt.fineSize)`.
93 |
94 | ## Acknowledgments
95 | Code is inspired by [pytorch-CycleGAN-and-pix2pix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix).
96 |
--------------------------------------------------------------------------------
/parsing_network/inference.py:
--------------------------------------------------------------------------------
1 | """Run DeepLab-ResNet on a given image.
2 |
3 | This script computes a segmentation mask for a given image.
4 | """
5 |
6 | from __future__ import print_function
7 |
8 | import argparse
9 | from datetime import datetime
10 | import os
11 | import sys
12 | import time
13 |
14 | from PIL import Image
15 |
16 | import tensorflow as tf
17 | import numpy as np
18 |
19 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, prepare_label
20 |
21 | import pdb
22 |
23 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
24 |
25 | NUM_CLASSES = 7
26 | DATA_LIST = './dataset/dance.txt'
27 | SAVE_DIR = './output/'
28 |
29 | def get_arguments():
30 | """Parse all the arguments provided from the CLI.
31 |
32 | Returns:
33 | A list of parsed arguments.
34 | """
35 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network Inference.")
36 | parser.add_argument("img_path", type=str,
37 | help="Path to the RGB image file folder.")
38 | parser.add_argument("model_weights", type=str,
39 | help="Path to the file with model weights.")
40 | parser.add_argument("--data_list", type=str, default=DATA_LIST,
41 | help="Path to the image list.")
42 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES,
43 | help="Number of classes to predict (including background).")
44 | parser.add_argument("--save-dir", type=str, default=SAVE_DIR,
45 | help="Where to save predicted mask.")
46 | return parser.parse_args()
47 |
48 | def load(saver, sess, ckpt_path):
49 | '''Load trained weights.
50 |
51 | Args:
52 | saver: TensorFlow saver object.
53 | sess: TensorFlow session.
54 | ckpt_path: path to checkpoint file with parameters.
55 | '''
56 | saver.restore(sess, ckpt_path)
57 | print("Restored model parameters from {}".format(ckpt_path))
58 |
59 | def file_len(fname):
60 | with open(fname) as f:
61 | for i, l in enumerate(f):
62 | pass
63 | return i + 1
64 |
65 | def main():
66 | """Create the model and start the evaluation process."""
67 | args = get_arguments()
68 | num_steps = file_len(args.data_list)
69 | # Create queue coordinator.
70 | coord = tf.train.Coordinator()
71 |
72 | # Load reader.
73 | with tf.name_scope("create_inputs"):
74 | reader = ImageReader(
75 | args.img_path,
76 | args.data_list,
77 | None, # No defined input size.
78 | False, # No random scale.
79 | False, # No random mirror.
80 | 255,
81 | IMG_MEAN,
82 | coord)
83 | image, label = reader.image, reader.label
84 | title = reader.queue[0]
85 | image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension.
86 |
87 | # Create network.
88 | net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes)
89 |
90 | # Which variables to load.
91 | restore_var = tf.global_variables()
92 |
93 | # Predictions.
94 | raw_output = net.layers['fc1_voc12']
95 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
96 | raw_output_up = tf.argmax(raw_output_up, dimension=3)
97 | pred = tf.expand_dims(raw_output_up, dim=3)
98 |
99 |
100 | # Set up TF session and initialize variables.
101 | config = tf.ConfigProto()
102 | config.gpu_options.allow_growth = True
103 | sess = tf.Session(config=config)
104 | init = tf.global_variables_initializer()
105 |
106 | sess.run(init)
107 |
108 | # Load weights.
109 | loader = tf.train.Saver(var_list=restore_var)
110 | load(loader, sess, args.model_weights)
111 |
112 | # Start queue threads.
113 | threads = tf.train.start_queue_runners(coord=coord, sess=sess)
114 |
115 | start_time = time.time()
116 | if not os.path.exists(args.save_dir):
117 | os.makedirs(args.save_dir)
118 | # Perform inference.
119 | for step in range(num_steps):
120 | preds, jpg_path = sess.run([pred, title])
121 | msk = decode_labels(preds, num_classes=args.num_classes)
122 | im = Image.fromarray(msk[0])
123 | img_o = Image.open(jpg_path)
124 | jpg_path = jpg_path.split('/')[-1].split('.')[0]
125 | img = np.array(im)*0.9 + np.array(img_o)*0.7
126 | img[img>255] = 255
127 | img = Image.fromarray(np.uint8(img))
128 | img.save(args.save_dir + jpg_path + '.png')
129 | print('Image processed {}.png'.format(jpg_path))
130 |
131 | total_time = time.time() - start_time
132 | print('The output files have been saved to {}'.format(args.save_dir))
133 | print('It took {} sec on each image.'.format(total_time/num_steps))
134 |
135 | if __name__ == '__main__':
136 | main()
137 |
--------------------------------------------------------------------------------
/data_generation/README.md:
--------------------------------------------------------------------------------
1 | # Generate Parsing Label
2 | This is the code of generating parsing label for the semi-supervised training of the Parsing Network.
3 |
4 | ## Prerequisites
5 | - Linux or macOS
6 | - Python 2 and 3
7 |
8 | ## Getting Started
9 | ### Installation
10 | - Install python libraries if missing, include opencv-python, numpy, etc.
11 |
12 | - Install PyTorch 0.3.0 and dependencies from http://pytorch.org in Python 3.
13 | - Install Torch vision from the source in Python 3.
14 | ```bash
15 | git clone https://github.com/pytorch/vision
16 | cd vision
17 | python3 setup.py install
18 | ```
19 |
20 | - Install python libraries [visdom](https://github.com/facebookresearch/visdom) and [dominate](https://github.com/Knio/dominate) in Python3.
21 | ```bash
22 | pip3 install visdom
23 | pip3 install dominate
24 | ```
25 |
26 | - Clone this repo:
27 | ```bash
28 | git clone https://github.com/MVIG-SJTU/WSHP
29 | cd WSHP/data_generation
30 | ```
31 | - Download [demo data](https://drive.google.com/open?id=1N6yYgrulPHqsCRACdbX7MpAWFnYNaAYm) and extract them to directory `examples`.
32 |
33 | ### Demo
34 | - For demo, just run the following bash order:
35 | ```
36 | bash demo.sh
37 | ```
38 |
39 | - After finished, under directory `examples/outputs`, directories `crop_output`, `merge_output`, `overlay_output` contains outputs for `cropped origin image and prior of single person`, `complete parsing label of complete origin image`, `overlayed origin image and parsing label` respectively.
40 |
41 | Origin images:
42 |
43 |
44 |
45 |
46 | Parsing labels:
47 |
48 |
49 |
50 |
51 | ### Run
52 | 1. Prepare keypoint annotations. For dataset without keypoint annotations, one can use keypoint detecion network to detect keypoint. We use [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) by running `./run.sh --indir examples/demo/ --outdir examples/results/ --dataset MPII`. **Note that our algorithm can only apply to images with whole body persons, use `pick_full_person.py` to select such images**. Under the folder `examples`, file `examples.json` is an example output and our code reads keypoint information from a json file with the same format.
53 |
54 | 2. Crop and generate prior. Python script `crop_pose_and_generate_testing_prior.py` shows how to crop out single person from origin images and generate prior given keypoint information (specified by a json file).
55 | ```
56 | python crop_pose_and_generate_testing_prior.py --PASCALPoseFileRoot /path/to/pascal_pose_file.csv --PASCALMaskImgDir /path/to/pascal_mask_img --n 3 --k 3 --aug 0.25 --origin_img_root /path/to/origin_img --json_file_root /path/to/pose_json_file --outputDir /path/to/output
57 | ```
58 |
59 | 3. Generate parsing label for single person. In this step we can use the test mode of a pretrained model of refinement network to generate label for each image of single person. This part please refer to submodule `refinement_network`.
60 |
61 | 4. Merge together to get the complete parsing label. Python script `merge_parsing_result.py` shows how to merge together label of different person from the same image to get the complete parsing label for each origin image.
62 | ```
63 | python merge_parsing_result.py --outputDir /path/to/output --parsing_root /root_of_refinement_network/results/${experiment_name}/test_latest/images --origin_img_root /path/to/origin_img --json_file_root /path/to/pose_json_file --aug 0.25
64 | ```
65 |
66 | 5. Overlay origin image with corresponding color parsing label to check the results.
67 | ```
68 | python overlay.py --origin_img_root /path/to/origin_img --prior_img_root /path/to/origin_img --outputDir /path/to/output
69 | ```
70 |
71 | - Bash script `pose2label.sh` gather steps 2,3,4,5 together, you can use it to get parsing label conveniently without waiting for each step to finish and start the next step.
72 | ```
73 | bash pose2label.sh
74 | ```
75 | You have to specify the following parameters at the begining of the script:
76 | ```
77 | pascal_pose_file_root="/path/to/pascal_pose_file.csv"
78 | pascal_mask_img_dir="/path/to/pascal_mask_img"
79 | origin_img_root="/path/to/origin_img"
80 | json_file_root="/path/to/pose_json_file"
81 | crop_output_path="/path/to/output/cropped_img_and_prior"
82 | experiment_name="exp1"
83 | merge_output_path="/path/to/output/merged_parsing_label"
84 | overlay_output_path="/path/to/output/overlayed_image"
85 | ```
86 |
87 | - One thing should be noted is the format of keypoints.
88 | The order we use for Pascal images is as follow:
89 | ```
90 | 0-'right ankle' 1-'right knee' 2-'right hip' 3-'left hip' 4-'left knee' 5-'left ankle' 6-'pelvis' 7-'thorax' 8-'neck' 9-'head' 10-'right wrist' 11-'right elbow' 12-'right shoulder' 13-'left shoulder' 14-'left elbow' 15-'left wrist'.
91 | ```
92 | Actually, `thorax` is unused and set to `(0, 0)`. And when `pelvis` is missing, we use the midpoint of two hips instead.
93 | The format of the output of [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) is different from this format and we adjust it in our code so that it can be compared with pose of Pascal images.
94 |
95 |
--------------------------------------------------------------------------------
/parsing_network/evaluate.py:
--------------------------------------------------------------------------------
1 | """Evaluation script for the DeepLab-ResNet network on the validation subset
2 | of PASCAL VOC dataset.
3 |
4 | This script evaluates the model on 1449 validation images.
5 | """
6 |
7 | from __future__ import print_function
8 |
9 | import argparse
10 | from datetime import datetime
11 | import os
12 | import sys
13 | import time
14 |
15 | import tensorflow as tf
16 | import numpy as np
17 |
18 | from deeplab_resnet import DeepLabResNetModel, ImageReader, prepare_label
19 |
20 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
21 |
22 | DATA_DIRECTORY = '/home/VOCdevkit'
23 | DATA_LIST_PATH = './dataset/pascal_test.txt'
24 | IGNORE_LABEL = 255
25 | NUM_CLASSES = 7
26 | NUM_STEPS = 1831 # Number of images in the validation set.
27 | RESTORE_FROM = './deeplab_resnet.ckpt'
28 |
29 | def get_arguments():
30 | """Parse all the arguments provided from the CLI.
31 |
32 | Returns:
33 | A list of parsed arguments.
34 | """
35 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network")
36 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY,
37 | help="Path to the directory containing the PASCAL VOC dataset.")
38 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH,
39 | help="Path to the file listing the images in the dataset.")
40 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL,
41 | help="The index of the label to ignore during the training.")
42 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES,
43 | help="Number of classes to predict (including background).")
44 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS,
45 | help="Number of images in the validation set.")
46 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM,
47 | help="Where restore model parameters from.")
48 | return parser.parse_args()
49 |
50 | def load(saver, sess, ckpt_path):
51 | '''Load trained weights.
52 |
53 | Args:
54 | saver: TensorFlow saver object.
55 | sess: TensorFlow session.
56 | ckpt_path: path to checkpoint file with parameters.
57 | '''
58 | saver.restore(sess, ckpt_path)
59 | print("Restored model parameters from {}".format(ckpt_path))
60 |
61 | def main():
62 | """Create the model and start the evaluation process."""
63 | args = get_arguments()
64 |
65 | # Create queue coordinator.
66 | coord = tf.train.Coordinator()
67 |
68 | # Load reader.
69 | with tf.name_scope("create_inputs"):
70 | reader = ImageReader(
71 | args.data_dir,
72 | args.data_list,
73 | None, # No defined input size.
74 | False, # No random scale.
75 | False, # No random mirror.
76 | args.ignore_label,
77 | IMG_MEAN,
78 | coord)
79 | image, label = reader.image, reader.label
80 | image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension.
81 |
82 | # Create network.
83 | net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes)
84 |
85 | # Which variables to load.
86 | restore_var = tf.global_variables()
87 |
88 | # Predictions.
89 | raw_output = net.layers['fc1_voc12']
90 | raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
91 | raw_output = tf.argmax(raw_output, dimension=3)
92 | pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor.
93 |
94 | # mIoU
95 | pred = tf.reshape(pred, [-1,])
96 | gt = tf.reshape(label_batch, [-1,])
97 | weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes.
98 | mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes, weights=weights)
99 |
100 | # Set up tf session and initialize variables.
101 | config = tf.ConfigProto()
102 | config.gpu_options.allow_growth = True
103 | sess = tf.Session(config=config)
104 | init = tf.global_variables_initializer()
105 |
106 | sess.run(init)
107 | sess.run(tf.local_variables_initializer())
108 |
109 | # Load weights.
110 | loader = tf.train.Saver(var_list=restore_var)
111 | if args.restore_from is not None:
112 | load(loader, sess, args.restore_from)
113 |
114 | # Start queue threads.
115 | threads = tf.train.start_queue_runners(coord=coord, sess=sess)
116 |
117 | # Iterate over training steps.
118 | for step in range(args.num_steps):
119 | preds, _ = sess.run([pred, update_op])
120 | if step % 100 == 0:
121 | print('step {:d}'.format(step))
122 | print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess)))
123 | coord.request_stop()
124 | coord.join(threads)
125 |
126 | if __name__ == '__main__':
127 | main()
128 |
--------------------------------------------------------------------------------
/data_generation/crop_pose_and_generate_testing_prior.py:
--------------------------------------------------------------------------------
1 | '''
2 | Given a json file containing poses for images(one image may have more than one pose corresponding to different people),
3 | crop out each people and generate corresponding prior.
4 | Then we can use the test mode of the pre-trained refinement model to generate parsing result for each cropped pose.
5 |
6 | >>> python crop_pose_and_generate_testing_prior.py --PASCALPoseFileRoot /path/to/pascal_pose_file.csv --PASCALMaskImgDir /path/to/pascal_mask_img --n 3 --k 3 --aug 0.25 --origin_img_root /path/to/origin_img --json_file_root /path/to/pose_json_file --outputDir /path/to/output
7 | >>>
8 | '''
9 |
10 | import argparse
11 | import json
12 | from generate_prior_util import *
13 |
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument("--PASCALPoseFileRoot", help="path to PASCAL pose file")
16 | parser.add_argument("--PASCALMaskImgDir", help="path to PASCAL mask images")
17 | parser.add_argument("--origin_img_root", help="path to origin img")
18 | parser.add_argument("--json_file_root", help="path to json file")
19 | parser.add_argument("--outputDir", help="where to put output files")
20 | parser.add_argument("--draw_skeleton", action="store_true", help="draw skeleton to check the format of keypoints")
21 | parser.add_argument("--n", type=int, default=5, help="number of close images picked first time")
22 | parser.add_argument("--k", type=int, default=3, help="number of close images picked for prior generation in n picked images")
23 | parser.add_argument("--aug", type=float, default=0.25, help='augmentation factor for crop')
24 | opt = parser.parse_args()
25 |
26 | json_file_root = opt.json_file_root
27 | origin_img_root = opt.origin_img_root
28 | json_file = open(json_file_root, "r")
29 | json_string = json_file.readline()
30 | json_dict = json.loads(json_string)
31 | print('length of json_dict', len(json_dict))
32 |
33 | pascal_poses, pascal_img_names, pascal_pose_dict = load_pascal_pose(opt.PASCALPoseFileRoot)
34 | print('length of pascal_img', len(pascal_img_names))
35 |
36 | if not os.path.exists(opt.outputDir):
37 | os.makedirs(opt.outputDir)
38 | img_dir = os.path.join(opt.outputDir, 'img')
39 | prior_dir = os.path.join(opt.outputDir, 'prior')
40 | if not os.path.exists(img_dir):
41 | os.makedirs(img_dir)
42 | if not os.path.exists(prior_dir):
43 | os.makedirs(prior_dir)
44 |
45 | if opt.draw_skeleton:
46 | skeleton_dir = os.path.join(opt.outputDir, 'skeleton')
47 | if not os.path.exists(skeleton_dir):
48 | os.makedirs(skeleton_dir)
49 |
50 | # alphapose to pascal keypoints order
51 | alphapose2pascal = [9, 8, 12, 11, 10, 13, 14, 15, 2, 1, 0, 3, 4, 5, 7]
52 | # the 6th keypoint is missing
53 |
54 | num_images = 0
55 | for k, v in json_dict.items():
56 | num_images += 1
57 | image_id = k
58 | origin_img = cv2.imread(os.path.join(origin_img_root, image_id))
59 | bodies = v["bodies"]
60 | for i in range(len(bodies)):
61 | body = bodies[i]
62 | keypoints = body["joints"]
63 | raw_pose = np.zeros((1, 32), dtype=float)
64 | min_x = keypoints[0]
65 | max_x = min_x
66 | min_y = keypoints[1]
67 | max_y = min_y
68 | for j in range(15):
69 | x = keypoints[3*j]
70 | y = keypoints[3*j+1]
71 | raw_pose[0][2*alphapose2pascal[j]] = x
72 | raw_pose[0][2*alphapose2pascal[j]+1] = y
73 | if x < min_x:
74 | min_x = x
75 | elif x > max_x:
76 | max_x = x
77 | if y < min_y:
78 | min_y = y
79 | elif y > max_y:
80 | max_y = y
81 | raw_pose[0][2*6] = (raw_pose[0][2*2] + raw_pose[0][2*3]) / 2
82 | raw_pose[0][2*6+1] = (raw_pose[0][2*2+1] + raw_pose[0][2*3+1]) / 2
83 | if max_x > origin_img.shape[1] or max_y > origin_img.shape[0]-1:
84 | print(max_x, max_y)
85 | print(image_id + " pose outside img")
86 |
87 | # deal with bbox
88 | bbox = [min_x, min_y, max_x, max_y]
89 | xaug = int((max_x - min_x + 1) * opt.aug)
90 | yaug = int((max_y - min_y + 1) * opt.aug)
91 | bbox[0] = max(bbox[0] - xaug, 0)
92 | bbox[1] = max(bbox[1] - yaug, 0)
93 | bbox[2] = min(bbox[2] + xaug, origin_img.shape[1]-1)
94 | bbox[3] = min(bbox[3] + yaug, origin_img.shape[0]-1)
95 | print('bbox', bbox)
96 |
97 | prior = generate_prior_single_person(bbox, raw_pose, opt.PASCALMaskImgDir, pascal_poses, pascal_img_names, pascal_pose_dict, opt.n, opt.k)
98 | prior = prior[:, :, [2, 1, 0]]
99 | img = origin_img[bbox[1]:bbox[3]+1, bbox[0]:bbox[2]+1]
100 | if opt.draw_skeleton:
101 | skeleton_img = drawSkeleton(origin_img, raw_pose)
102 | cv2.imwrite(os.path.join(skeleton_dir, image_id.split('.')[0]+'_'+str(i)+'.jpg'), skeleton_img)
103 | cv2.imwrite(os.path.join(img_dir, image_id.split('.')[0]+'_'+str(i)+'.jpg'), img)
104 | cv2.imwrite(os.path.join(prior_dir, image_id.split('.')[0]+'_'+str(i)+'.jpg'), prior)
105 |
106 | print(image_id, i, num_images)
107 |
108 | print('finished')
109 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/layers.py:
--------------------------------------------------------------------------------
1 | import re
2 | import numbers
3 | from collections import namedtuple
4 |
5 | from .shapes import *
6 |
7 | LAYER_DESCRIPTORS = {
8 |
9 | # Caffe Types
10 | 'AbsVal': shape_identity,
11 | 'Accuracy': shape_scalar,
12 | 'ArgMax': shape_not_implemented,
13 | 'BatchNorm': shape_identity,
14 | 'BNLL': shape_not_implemented,
15 | 'Concat': shape_concat,
16 | 'ContrastiveLoss': shape_scalar,
17 | 'Convolution': shape_convolution,
18 | 'Deconvolution': shape_not_implemented,
19 | 'Data': shape_data,
20 | 'Dropout': shape_identity,
21 | 'DummyData': shape_data,
22 | 'EuclideanLoss': shape_scalar,
23 | 'Eltwise': shape_identity,
24 | 'Exp': shape_identity,
25 | 'Flatten': shape_not_implemented,
26 | 'HDF5Data': shape_data,
27 | 'HDF5Output': shape_identity,
28 | 'HingeLoss': shape_scalar,
29 | 'Im2col': shape_not_implemented,
30 | 'ImageData': shape_data,
31 | 'InfogainLoss': shape_scalar,
32 | 'InnerProduct': shape_inner_product,
33 | 'Input': shape_data,
34 | 'LRN': shape_identity,
35 | 'MemoryData': shape_mem_data,
36 | 'MultinomialLogisticLoss': shape_scalar,
37 | 'MVN': shape_not_implemented,
38 | 'Pooling': shape_pool,
39 | 'Power': shape_identity,
40 | 'ReLU': shape_identity,
41 | 'Scale': shape_identity,
42 | 'Sigmoid': shape_identity,
43 | 'SigmoidCrossEntropyLoss': shape_scalar,
44 | 'Silence': shape_not_implemented,
45 | 'Softmax': shape_identity,
46 | 'SoftmaxWithLoss': shape_scalar,
47 | 'Split': shape_not_implemented,
48 | 'Slice': shape_not_implemented,
49 | 'TanH': shape_identity,
50 | 'WindowData': shape_not_implemented,
51 | 'Threshold': shape_identity,
52 | }
53 |
54 | LAYER_TYPES = LAYER_DESCRIPTORS.keys()
55 |
56 | LayerType = type('LayerType', (), {t: t for t in LAYER_TYPES})
57 |
58 | class NodeKind(LayerType):
59 |
60 | @staticmethod
61 | def map_raw_kind(kind):
62 | if kind in LAYER_TYPES:
63 | return kind
64 | return None
65 |
66 | @staticmethod
67 | def compute_output_shape(node):
68 | try:
69 | val = LAYER_DESCRIPTORS[node.kind](node)
70 | return val
71 | except NotImplementedError:
72 | raise KaffeError('Output shape computation not implemented for type: %s' % node.kind)
73 |
74 |
75 | class NodeDispatchError(KaffeError):
76 |
77 | pass
78 |
79 |
80 | class NodeDispatch(object):
81 |
82 | @staticmethod
83 | def get_handler_name(node_kind):
84 | if len(node_kind) <= 4:
85 | # A catch-all for things like ReLU and tanh
86 | return node_kind.lower()
87 | # Convert from CamelCase to under_scored
88 | name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', node_kind)
89 | return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
90 |
91 | def get_handler(self, node_kind, prefix):
92 | name = self.get_handler_name(node_kind)
93 | name = '_'.join((prefix, name))
94 | try:
95 | return getattr(self, name)
96 | except AttributeError:
97 | raise NodeDispatchError('No handler found for node kind: %s (expected: %s)' %
98 | (node_kind, name))
99 |
100 |
101 | class LayerAdapter(object):
102 |
103 | def __init__(self, layer, kind):
104 | self.layer = layer
105 | self.kind = kind
106 |
107 | @property
108 | def parameters(self):
109 | name = NodeDispatch.get_handler_name(self.kind)
110 | name = '_'.join((name, 'param'))
111 | try:
112 | return getattr(self.layer, name)
113 | except AttributeError:
114 | raise NodeDispatchError('Caffe parameters not found for layer kind: %s' % (self.kind))
115 |
116 | @staticmethod
117 | def get_kernel_value(scalar, repeated, idx, default=None):
118 | if scalar:
119 | return scalar
120 | if repeated:
121 | if isinstance(repeated, numbers.Number):
122 | return repeated
123 | if len(repeated) == 1:
124 | # Same value applies to all spatial dimensions
125 | return int(repeated[0])
126 | assert idx < len(repeated)
127 | # Extract the value for the given spatial dimension
128 | return repeated[idx]
129 | if default is None:
130 | raise ValueError('Unable to determine kernel parameter!')
131 | return default
132 |
133 | @property
134 | def kernel_parameters(self):
135 | assert self.kind in (NodeKind.Convolution, NodeKind.Pooling)
136 | params = self.parameters
137 | k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0)
138 | k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1)
139 | s_h = self.get_kernel_value(params.stride_h, params.stride, 0, default=1)
140 | s_w = self.get_kernel_value(params.stride_w, params.stride, 1, default=1)
141 | p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0)
142 | p_w = self.get_kernel_value(params.pad_h, params.pad, 1, default=0)
143 | return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w)
144 |
145 |
146 | KernelParameters = namedtuple('KernelParameters', ['kernel_h', 'kernel_w', 'stride_h', 'stride_w',
147 | 'pad_h', 'pad_w'])
148 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/options/base_options.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | from util import util
4 | import torch
5 |
6 |
7 | class BaseOptions():
8 | def __init__(self):
9 | self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
10 | self.initialized = False
11 |
12 | def initialize(self):
13 | self.parser.add_argument('--dataroot', required=True, help='path to images. '
14 | 'For train(aligned_dataset), should have subdir {img}/{prior}/{parsing}, and {prior} should have subdir containing n prior images for each image in {img}, named by the name of each image;'
15 | 'For test(single_dataset), should have subdir {img}/{prior}')
16 | self.parser.add_argument('--batchSize', type=int, default=1, help='input batch size')
17 | self.parser.add_argument('--loadSize', type=int, default=286, help='scale images to this size')
18 | self.parser.add_argument('--fineSize', type=int, default=256, help='then crop to this size')
19 | self.parser.add_argument('--input_nc', type=int, default=6, help='# of input image channels')
20 | self.parser.add_argument('--output_nc', type=int, default=1, help='# of output image channels')
21 | self.parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in first conv layer')
22 | self.parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in first conv layer')
23 | self.parser.add_argument('--which_model_netD', type=str, default='basic', help='selects model to use for netD')
24 | self.parser.add_argument('--which_model_netG', type=str, default='unet_256', help='selects model to use for netG')
25 | self.parser.add_argument('--n_layers_D', type=int, default=3, help='only used if which_model_netD==n_layers')
26 | self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU')
27 | self.parser.add_argument('--name', type=str, default='experiment_name', help='name of the experiment. It decides where to store samples and models')
28 | self.parser.add_argument('--dataset_mode', type=str, default='aligned_prior', help='chooses how datasets are loaded. [aligned | single]')
29 | self.parser.add_argument('--model', type=str, default='pix2pix', help='chooses which model to use. pix2pix, test')
30 | self.parser.add_argument('--which_direction', type=str, default='AtoB', help='AtoB or BtoA')
31 | self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data')
32 | self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here')
33 | self.parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization')
34 | self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly')
35 | self.parser.add_argument('--display_winsize', type=int, default=256, help='display window size')
36 | self.parser.add_argument('--display_id', type=int, default=0, help='window id of the web display')
37 | self.parser.add_argument('--display_port', type=int, default=8097, help='visdom port of the web display')
38 | self.parser.add_argument('--no_dropout', action='store_true', help='no dropout for the generator')
39 | self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.')
40 | self.parser.add_argument('--resize_or_crop', type=str, default='resize_and_crop', help='scaling and cropping of images at load time [resize_and_crop|crop|scale_width|scale_width_and_crop]')
41 | self.parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data augmentation')
42 | self.parser.add_argument('--init_type', type=str, default='normal', help='network initialization [normal|xavier|kaiming|orthogonal]')
43 |
44 | self.initialized = True
45 |
46 | def parse(self):
47 | if not self.initialized:
48 | self.initialize()
49 | self.opt = self.parser.parse_args()
50 | self.opt.isTrain = self.isTrain # train or test
51 |
52 | str_ids = self.opt.gpu_ids.split(',')
53 | self.opt.gpu_ids = []
54 | for str_id in str_ids:
55 | id = int(str_id)
56 | if id >= 0:
57 | self.opt.gpu_ids.append(id)
58 |
59 | # set gpu ids
60 | if len(self.opt.gpu_ids) > 0:
61 | torch.cuda.set_device(self.opt.gpu_ids[0])
62 |
63 | args = vars(self.opt)
64 |
65 | print('------------ Options -------------')
66 | for k, v in sorted(args.items()):
67 | print('%s: %s' % (str(k), str(v)))
68 | print('-------------- End ----------------')
69 |
70 | # save to the disk
71 | expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name)
72 | util.mkdirs(expr_dir)
73 | file_name = os.path.join(expr_dir, 'opt.txt')
74 | with open(file_name, 'wt') as opt_file:
75 | opt_file.write('------------ Options -------------\n')
76 | for k, v in sorted(args.items()):
77 | opt_file.write('%s: %s\n' % (str(k), str(v)))
78 | opt_file.write('-------------- End ----------------\n')
79 | return self.opt
80 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/util/visualizer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import ntpath
4 | import time
5 | from . import util
6 | from . import html
7 | from scipy.misc import imresize
8 |
9 |
10 | class Visualizer():
11 | def __init__(self, opt):
12 | # self.opt = opt
13 | self.display_id = opt.display_id
14 | self.use_html = opt.isTrain and not opt.no_html
15 | self.win_size = opt.display_winsize
16 | self.name = opt.name
17 | self.opt = opt
18 | self.saved = False
19 | if self.display_id > 0:
20 | import visdom
21 | self.vis = visdom.Visdom(port=opt.display_port)
22 |
23 | if self.use_html:
24 | self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web')
25 | self.img_dir = os.path.join(self.web_dir, 'images')
26 | print('create web directory %s...' % self.web_dir)
27 | util.mkdirs([self.web_dir, self.img_dir])
28 | self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt')
29 | with open(self.log_name, "a") as log_file:
30 | now = time.strftime("%c")
31 | log_file.write('================ Training Loss (%s) ================\n' % now)
32 |
33 | def reset(self):
34 | self.saved = False
35 |
36 | # |visuals|: dictionary of images to display or save
37 | def display_current_results(self, visuals, epoch, save_result):
38 | if self.display_id > 0: # show images in the browser
39 | ncols = self.opt.display_single_pane_ncols
40 | if ncols > 0:
41 | h, w = next(iter(visuals.values())).shape[:2]
42 | table_css = """""" % (w, h)
46 | title = self.name
47 | label_html = ''
48 | label_html_row = ''
49 | nrows = int(np.ceil(len(visuals.items()) / ncols))
50 | images = []
51 | idx = 0
52 | for label, image_numpy in visuals.items():
53 | label_html_row += '%s | ' % label
54 | images.append(image_numpy.transpose([2, 0, 1]))
55 | idx += 1
56 | if idx % ncols == 0:
57 | label_html += '%s
' % label_html_row
58 | label_html_row = ''
59 | white_image = np.ones_like(image_numpy.transpose([2, 0, 1])) * 255
60 | while idx % ncols != 0:
61 | images.append(white_image)
62 | label_html_row += ' | '
63 | idx += 1
64 | if label_html_row != '':
65 | label_html += '%s
' % label_html_row
66 | # pane col = image row
67 | self.vis.images(images, nrow=ncols, win=self.display_id + 1,
68 | padding=2, opts=dict(title=title + ' images'))
69 | label_html = '' % label_html
70 | self.vis.text(table_css + label_html, win=self.display_id + 2,
71 | opts=dict(title=title + ' labels'))
72 | else:
73 | idx = 1
74 | for label, image_numpy in visuals.items():
75 | self.vis.image(image_numpy.transpose([2, 0, 1]), opts=dict(title=label),
76 | win=self.display_id + idx)
77 | idx += 1
78 |
79 | if self.use_html and (save_result or not self.saved): # save images to a html file
80 | self.saved = True
81 | for label, image_numpy in visuals.items():
82 | img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label))
83 | util.save_image(image_numpy, img_path)
84 | # update website
85 | webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, reflesh=1)
86 | for n in range(epoch, 0, -1):
87 | webpage.add_header('epoch [%d]' % n)
88 | ims = []
89 | txts = []
90 | links = []
91 |
92 | for label, image_numpy in visuals.items():
93 | img_path = 'epoch%.3d_%s.png' % (n, label)
94 | ims.append(img_path)
95 | txts.append(label)
96 | links.append(img_path)
97 | webpage.add_images(ims, txts, links, width=self.win_size)
98 | webpage.save()
99 |
100 | # errors: dictionary of error labels and values
101 | def plot_current_errors(self, epoch, counter_ratio, opt, errors):
102 | if not hasattr(self, 'plot_data'):
103 | self.plot_data = {'X': [], 'Y': [], 'legend': list(errors.keys())}
104 | self.plot_data['X'].append(epoch + counter_ratio)
105 | self.plot_data['Y'].append([errors[k] for k in self.plot_data['legend']])
106 | self.vis.line(
107 | X=np.stack([np.array(self.plot_data['X'])] * len(self.plot_data['legend']), 1),
108 | Y=np.array(self.plot_data['Y']),
109 | opts={
110 | 'title': self.name + ' loss over time',
111 | 'legend': self.plot_data['legend'],
112 | 'xlabel': 'epoch',
113 | 'ylabel': 'loss'},
114 | win=self.display_id)
115 |
116 | # errors: same format as |errors| of plotCurrentErrors
117 | def print_current_errors(self, epoch, i, errors, t, t_data):
118 | message = '(epoch: %d, iters: %d, time: %.3f, data: %.3f) ' % (epoch, i, t, t_data)
119 | for k, v in errors.items():
120 | message += '%s: %.3f ' % (k, v)
121 |
122 | print(message)
123 | with open(self.log_name, "a") as log_file:
124 | log_file.write('%s\n' % message)
125 |
126 | # save image to the disk
127 | def save_images(self, webpage, visuals, image_path, aspect_ratio=1.0):
128 | image_dir = webpage.get_image_dir()
129 | short_path = ntpath.basename(image_path[0])
130 | name = os.path.splitext(short_path)[0]
131 |
132 | webpage.add_header(name)
133 | ims = []
134 | txts = []
135 | links = []
136 |
137 | for label, im in visuals.items():
138 | image_name = '%s_%s.png' % (name, label)
139 | save_path = os.path.join(image_dir, image_name)
140 | h, w, _ = im.shape
141 | if aspect_ratio > 1.0:
142 | im = imresize(im, (h, int(w * aspect_ratio)), interp='bicubic')
143 | if aspect_ratio < 1.0:
144 | im = imresize(im, (int(h / aspect_ratio), w), interp='bicubic')
145 | util.save_image(im, save_path)
146 |
147 | ims.append(image_name)
148 | txts.append(label)
149 | links.append(image_name)
150 | webpage.add_images(ims, txts, links, width=self.win_size)
151 |
--------------------------------------------------------------------------------
/data_generation/refinement_network/models/pix2pix_model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from collections import OrderedDict
3 | from torch.autograd import Variable
4 | import util.util as util
5 | from util.image_pool import ImagePool
6 | from .base_model import BaseModel
7 | from . import networks
8 |
9 |
10 | class Pix2PixModel(BaseModel):
11 | def name(self):
12 | return 'Pix2PixModel'
13 |
14 | def initialize(self, opt):
15 | BaseModel.initialize(self, opt)
16 | self.isTrain = opt.isTrain
17 |
18 | # load/define networks
19 | self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf,
20 | opt.which_model_netG, opt.norm, not opt.no_dropout, opt.init_type, self.gpu_ids)
21 | if self.isTrain and (not opt.no_gan):
22 | use_sigmoid = opt.no_lsgan
23 | self.netD = networks.define_D(opt.input_nc + opt.output_nc, opt.ndf,
24 | opt.which_model_netD,
25 | opt.n_layers_D, opt.norm, use_sigmoid, opt.init_type, self.gpu_ids)
26 | if not self.isTrain or opt.continue_train:
27 | self.load_network(self.netG, 'G', opt.which_epoch)
28 | if self.isTrain and (not opt.no_gan):
29 | self.load_network(self.netD, 'D', opt.which_epoch)
30 |
31 | if self.isTrain:
32 | self.fake_AB_pool = ImagePool(opt.pool_size)
33 | # define loss functions
34 | self.criterionGAN = networks.GANLoss(use_lsgan=not opt.no_lsgan, tensor=self.Tensor)
35 | if opt.use_l2:
36 | self.criterionL1 = torch.nn.MSELoss()
37 | else:
38 | self.criterionL1 = torch.nn.L1Loss()
39 |
40 | # initialize optimizers
41 | self.schedulers = []
42 | self.optimizers = []
43 | self.optimizer_G = torch.optim.Adam(self.netG.parameters(),
44 | lr=opt.lr, betas=(opt.beta1, 0.999))
45 | self.optimizers.append(self.optimizer_G)
46 | if not opt.no_gan:
47 | self.optimizer_D = torch.optim.Adam(self.netD.parameters(),
48 | lr=opt.lr, betas=(opt.beta1, 0.999))
49 | self.optimizers.append(self.optimizer_D)
50 | for optimizer in self.optimizers:
51 | self.schedulers.append(networks.get_scheduler(optimizer, opt))
52 |
53 | print('---------- Networks initialized -------------')
54 | networks.print_network(self.netG)
55 | if self.isTrain and (not opt.no_gan):
56 | networks.print_network(self.netD)
57 | print('-----------------------------------------------')
58 |
59 | def set_input(self, input):
60 | AtoB = self.opt.which_direction == 'AtoB'
61 | input_A = input['A' if AtoB else 'B']
62 | input_B = input['B' if AtoB else 'A']
63 | if len(self.gpu_ids) > 0:
64 | input_A = input_A.cuda(self.gpu_ids[0], async=True)
65 | input_B = input_B.cuda(self.gpu_ids[0], async=True)
66 | self.input_A = input_A
67 | self.input_B = input_B
68 | self.image_paths = input['A_paths' if AtoB else 'B_paths']
69 |
70 | def forward(self):
71 | self.real_A = Variable(self.input_A)
72 | self.fake_B = self.netG(self.real_A)
73 | self.real_B = Variable(self.input_B)
74 |
75 | # no backprop gradients
76 | def test(self):
77 | self.real_A = Variable(self.input_A, volatile=True)
78 | self.fake_B = self.netG(self.real_A)
79 | self.real_B = Variable(self.input_B, volatile=True)
80 |
81 | # get image paths
82 | def get_image_paths(self):
83 | return self.image_paths
84 |
85 | def backward_D(self):
86 | # Fake
87 | # stop backprop to the generator by detaching fake_B
88 | fake_AB = self.fake_AB_pool.query(torch.cat((self.real_A, self.fake_B), 1).data)
89 | pred_fake = self.netD(fake_AB.detach())
90 | self.loss_D_fake = self.criterionGAN(pred_fake, False)
91 |
92 | # Real
93 | real_AB = torch.cat((self.real_A, self.real_B), 1)
94 | pred_real = self.netD(real_AB)
95 | self.loss_D_real = self.criterionGAN(pred_real, True)
96 |
97 | # Combined loss
98 | self.loss_D = (self.loss_D_fake + self.loss_D_real) * 0.5
99 |
100 | self.loss_D.backward()
101 |
102 | def backward_G(self):
103 | if not self.opt.no_gan:
104 | # First, G(A) should fake the discriminator
105 | fake_AB = torch.cat((self.real_A, self.fake_B), 1)
106 | pred_fake = self.netD(fake_AB)
107 | self.loss_G_GAN = self.criterionGAN(pred_fake, True)
108 | else:
109 | self.loss_G_GAN = 0
110 |
111 | # Second, G(A) = B
112 | self.loss_G_L1 = self.criterionL1(self.fake_B, self.real_B) * self.opt.lambda_A
113 |
114 | self.loss_G = self.loss_G_GAN + self.loss_G_L1
115 |
116 | self.loss_G.backward()
117 |
118 | def optimize_parameters(self):
119 | self.forward()
120 | if not self.opt.no_gan:
121 | self.optimizer_D.zero_grad()
122 | self.backward_D()
123 | self.optimizer_D.step()
124 |
125 | self.optimizer_G.zero_grad()
126 | self.backward_G()
127 | self.optimizer_G.step()
128 |
129 | def get_current_errors(self):
130 | if not self.opt.no_gan:
131 | return OrderedDict([('G_GAN', self.loss_G_GAN.data[0]),
132 | ('G_L1', self.loss_G_L1.data[0]),
133 | ('D_real', self.loss_D_real.data[0]),
134 | ('D_fake', self.loss_D_fake.data[0])
135 | ])
136 | else:
137 | return OrderedDict([
138 | ('G_L1', self.loss_G_L1.data[0])
139 | ])
140 |
141 | def get_current_visuals(self):
142 | real_A_img, real_A_prior = util.tensor2im(self.real_A.data)
143 | fake_B = util.tensor2im(self.fake_B.data)
144 | real_B = util.tensor2im(self.real_B.data)
145 | if self.opt.output_nc == 1:
146 | fake_B_postprocessed = util.postprocess_parsing(fake_B, self.isTrain)
147 | fake_B_color = util.paint_color(fake_B_postprocessed)
148 | real_B_color = util.paint_color(util.postprocess_parsing(real_B, self.isTrain))
149 | if self.opt.output_nc == 1:
150 | return OrderedDict([
151 | ('real_A_img', real_A_img),
152 | ('real_A_prior', real_A_prior),
153 | ('fake_B', fake_B),
154 | ('fake_B_postprocessed', fake_B_postprocessed),
155 | ('fake_B_color', fake_B_color),
156 | ('real_B', real_B),
157 | ('real_B_color', real_B_color)]
158 | )
159 | else:
160 | return OrderedDict([
161 | ('real_A_img', real_A_img),
162 | ('real_A_prior', real_A_prior),
163 | ('fake_B', fake_B),
164 | ('real_B', real_B)]
165 | )
166 |
167 | def save(self, label):
168 | self.save_network(self.netG, 'G', label, self.gpu_ids)
169 | if not self.opt.no_gan:
170 | self.save_network(self.netD, 'D', label, self.gpu_ids)
171 |
--------------------------------------------------------------------------------
/parsing_network/deeplab_resnet/utils.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 | import numpy as np
3 | import tensorflow as tf
4 |
5 | # colour map
6 | label_colours = [(0,0,0)
7 | # 0=background
8 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128)
9 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
10 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0)
11 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
12 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128)
13 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
14 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)
15 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128)
16 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
17 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0)
18 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
19 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128)
20 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
21 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)
22 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128)
23 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
24 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0)
25 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
26 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128)
27 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
28 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)
29 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128)
30 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
31 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0)
32 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
33 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128)
34 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
35 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)
36 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128)
37 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
38 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0)
39 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
40 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128)
41 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
42 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)
43 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128)
44 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
45 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0)
46 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
47 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128)
48 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
49 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)
50 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128)
51 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
52 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0)
53 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
54 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128)
55 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
56 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)
57 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128)
58 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
59 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0)
60 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
61 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128)
62 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
63 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)
64 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128)
65 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
66 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0)
67 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
68 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128)
69 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
70 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)]
71 | # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
72 |
73 | def decode_labels(mask, num_images=1, num_classes=21):
74 | """Decode batch of segmentation masks.
75 |
76 | Args:
77 | mask: result of inference after taking argmax.
78 | num_images: number of images to decode from the batch.
79 | num_classes: number of classes to predict (including background).
80 |
81 | Returns:
82 | A batch with num_images RGB images of the same size as the input.
83 | """
84 | n, h, w, c = mask.shape
85 | assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images)
86 | outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8)
87 | for i in range(num_images):
88 | img = Image.new('RGB', (len(mask[i, 0]), len(mask[i])))
89 | pixels = img.load()
90 | for j_, j in enumerate(mask[i, :, :, 0]):
91 | for k_, k in enumerate(j):
92 | if k < num_classes:
93 | pixels[k_,j_] = label_colours[k]
94 | outputs[i] = np.array(img)
95 | return outputs
96 |
97 | def prepare_label(input_batch, new_size, num_classes, one_hot=True):
98 | """Resize masks and perform one-hot encoding.
99 |
100 | Args:
101 | input_batch: input tensor of shape [batch_size H W 1].
102 | new_size: a tensor with new height and width.
103 | num_classes: number of classes to predict (including background).
104 | one_hot: whether perform one-hot encoding.
105 |
106 | Returns:
107 | Outputs a tensor of shape [batch_size h w 21]
108 | with last dimension comprised of 0's and 1's only.
109 | """
110 | with tf.name_scope('label_encode'):
111 | input_batch = tf.image.resize_nearest_neighbor(input_batch, new_size) # as labels are integer numbers, need to use NN interp.
112 | input_batch = tf.squeeze(input_batch, squeeze_dims=[3]) # reducing the channel dimension.
113 | if one_hot:
114 | input_batch = tf.one_hot(input_batch, depth=num_classes)
115 | return input_batch
116 |
117 | def inv_preprocess(imgs, num_images, img_mean):
118 | """Inverse preprocessing of the batch of images.
119 | Add the mean vector and convert from BGR to RGB.
120 |
121 | Args:
122 | imgs: batch of input images.
123 | num_images: number of images to apply the inverse transformations on.
124 | img_mean: vector of mean colour values.
125 |
126 | Returns:
127 | The batch of the size num_images with the same spatial dimensions as the input.
128 | """
129 | n, h, w, c = imgs.shape
130 | assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images)
131 | outputs = np.zeros((num_images, h, w, c), dtype=np.uint8)
132 | for i in range(num_images):
133 | outputs[i] = (imgs[i] + img_mean)[:, :, ::-1].astype(np.uint8)
134 | return outputs
135 |
--------------------------------------------------------------------------------
/parsing_network/deeplab_resnet/image_reader.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import tensorflow as tf
5 |
6 | def image_scaling(img, label):
7 | """
8 | Randomly scales the images between 0.5 to 1.5 times the original size.
9 |
10 | Args:
11 | img: Training image to scale.
12 | label: Segmentation mask to scale.
13 | """
14 |
15 | scale = tf.random_uniform([1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None)
16 | h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[0]), scale))
17 | w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[1]), scale))
18 | new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1])
19 | img = tf.image.resize_images(img, new_shape)
20 | label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape)
21 | label = tf.squeeze(label, squeeze_dims=[0])
22 |
23 | return img, label
24 |
25 | def image_mirroring(img, label):
26 | """
27 | Randomly mirrors the images.
28 |
29 | Args:
30 | img: Training image to mirror.
31 | label: Segmentation mask to mirror.
32 | """
33 |
34 | distort_left_right_random = tf.random_uniform([1], 0, 1.0, dtype=tf.float32)[0]
35 | mirror = tf.less(tf.stack([1.0, distort_left_right_random, 1.0]), 0.5)
36 | mirror = tf.boolean_mask([0, 1, 2], mirror)
37 | img = tf.reverse(img, mirror)
38 | label = tf.reverse(label, mirror)
39 | return img, label
40 |
41 | def random_crop_and_pad_image_and_labels(image, label, crop_h, crop_w, ignore_label=255):
42 | """
43 | Randomly crop and pads the input images.
44 |
45 | Args:
46 | image: Training image to crop/ pad.
47 | label: Segmentation mask to crop/ pad.
48 | crop_h: Height of cropped segment.
49 | crop_w: Width of cropped segment.
50 | ignore_label: Label to ignore during the training.
51 | """
52 |
53 | label = tf.cast(label, dtype=tf.float32)
54 | label = label - ignore_label # Needs to be subtracted and later added due to 0 padding.
55 | combined = tf.concat(axis=2, values=[image, label])
56 | image_shape = tf.shape(image)
57 | combined_pad = tf.image.pad_to_bounding_box(combined, 0, 0, tf.maximum(crop_h, image_shape[0]), tf.maximum(crop_w, image_shape[1]))
58 |
59 | last_image_dim = tf.shape(image)[-1]
60 | last_label_dim = tf.shape(label)[-1]
61 | combined_crop = tf.random_crop(combined_pad, [crop_h,crop_w,4])
62 | img_crop = combined_crop[:, :, :last_image_dim]
63 | label_crop = combined_crop[:, :, last_image_dim:]
64 | label_crop = label_crop + ignore_label
65 | label_crop = tf.cast(label_crop, dtype=tf.uint8)
66 |
67 | # Set static shape so that tensorflow knows shape at compile time.
68 | img_crop.set_shape((crop_h, crop_w, 3))
69 | label_crop.set_shape((crop_h,crop_w, 1))
70 | return img_crop, label_crop
71 |
72 | def read_labeled_image_list(data_dir, data_list):
73 | """Reads txt file containing paths to images and ground truth masks.
74 |
75 | Args:
76 | data_dir: path to the directory with images and masks.
77 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
78 |
79 | Returns:
80 | Two lists with all file names for images and masks, respectively.
81 | """
82 | f = open(data_list, 'r')
83 | images = []
84 | masks = []
85 | for line in f:
86 | try:
87 | image, mask = line.strip("\n").split(' ')
88 | except ValueError: # Adhoc for test.
89 | image = mask = line.strip("\n")
90 | images.append(data_dir + image)
91 | masks.append(data_dir + mask)
92 | return images, masks
93 |
94 | def read_images_from_disk(input_queue, input_size, random_scale, random_mirror, ignore_label, img_mean): # optional pre-processing arguments
95 | """Read one image and its corresponding mask with optional pre-processing.
96 |
97 | Args:
98 | input_queue: tf queue with paths to the image and its mask.
99 | input_size: a tuple with (height, width) values.
100 | If not given, return images of original size.
101 | random_scale: whether to randomly scale the images prior
102 | to random crop.
103 | random_mirror: whether to randomly mirror the images prior
104 | to random crop.
105 | ignore_label: index of label to ignore during the training.
106 | img_mean: vector of mean colour values.
107 |
108 | Returns:
109 | Two tensors: the decoded image and its mask.
110 | """
111 |
112 | img_contents = tf.read_file(input_queue[0])
113 | label_contents = tf.read_file(input_queue[1])
114 |
115 | img = tf.image.decode_jpeg(img_contents, channels=3)
116 | img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img)
117 | img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32)
118 | # Extract mean.
119 | img -= img_mean
120 |
121 | label = tf.image.decode_png(label_contents, channels=1)
122 |
123 | if input_size is not None:
124 | h, w = input_size
125 |
126 | # Randomly scale the images and labels.
127 | if random_scale:
128 | img, label = image_scaling(img, label)
129 |
130 | # Randomly mirror the images and labels.
131 | if random_mirror:
132 | img, label = image_mirroring(img, label)
133 |
134 | # Randomly crops the images and labels.
135 | img, label = random_crop_and_pad_image_and_labels(img, label, h, w, ignore_label)
136 |
137 | return img, label
138 |
139 | class ImageReader(object):
140 | '''Generic ImageReader which reads images and corresponding segmentation
141 | masks from the disk, and enqueues them into a TensorFlow queue.
142 | '''
143 |
144 | def __init__(self, data_dir, data_list, input_size,
145 | random_scale, random_mirror, ignore_label, img_mean, coord):
146 | '''Initialise an ImageReader.
147 |
148 | Args:
149 | data_dir: path to the directory with images and masks.
150 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
151 | input_size: a tuple with (height, width) values, to which all the images will be resized.
152 | random_scale: whether to randomly scale the images prior to random crop.
153 | random_mirror: whether to randomly mirror the images prior to random crop.
154 | ignore_label: index of label to ignore during the training.
155 | img_mean: vector of mean colour values.
156 | coord: TensorFlow queue coordinator.
157 | '''
158 | self.data_dir = data_dir
159 | self.data_list = data_list
160 | self.input_size = input_size
161 | self.coord = coord
162 |
163 | self.image_list, self.label_list = read_labeled_image_list(self.data_dir, self.data_list)
164 | self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string)
165 | self.labels = tf.convert_to_tensor(self.label_list, dtype=tf.string)
166 | self.queue = tf.train.slice_input_producer([self.images, self.labels],
167 | shuffle=input_size is not None) # not shuffling if it is val
168 | self.image, self.label = read_images_from_disk(self.queue, self.input_size, random_scale, random_mirror, ignore_label, img_mean)
169 |
170 | def dequeue(self, num_elements):
171 | '''Pack images and labels into a batch.
172 |
173 | Args:
174 | num_elements: the batch size.
175 |
176 | Returns:
177 | Two tensors of size (batch_size, h, w, {3, 1}) for images and masks.'''
178 | image_batch, label_batch = tf.train.batch([self.image, self.label],
179 | num_elements)
180 | return image_batch, label_batch
181 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/tensorflow/transformer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from ..errors import KaffeError, print_stderr
4 | from ..graph import GraphBuilder, NodeMapper
5 | from ..layers import NodeKind
6 | from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser,
7 | BatchNormScaleBiasFuser, BatchNormPreprocessor, ParameterNamer)
8 |
9 | from . import network
10 |
11 |
12 | def get_padding_type(kernel_params, input_shape, output_shape):
13 | '''Translates Caffe's numeric padding to one of ('SAME', 'VALID').
14 | Caffe supports arbitrary padding values, while TensorFlow only
15 | supports 'SAME' and 'VALID' modes. So, not all Caffe paddings
16 | can be translated to TensorFlow. There are some subtleties to
17 | how the padding edge-cases are handled. These are described here:
18 | https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto
19 | '''
20 | k_h, k_w, s_h, s_w, p_h, p_w = kernel_params
21 | s_o_h = np.ceil(input_shape.height / float(s_h))
22 | s_o_w = np.ceil(input_shape.width / float(s_w))
23 | if (output_shape.height == s_o_h) and (output_shape.width == s_o_w):
24 | return 'SAME'
25 | v_o_h = np.ceil((input_shape.height - k_h + 1.0) / float(s_h))
26 | v_o_w = np.ceil((input_shape.width - k_w + 1.0) / float(s_w))
27 | if (output_shape.height == v_o_h) and (output_shape.width == v_o_w):
28 | return 'VALID'
29 | return None
30 |
31 |
32 | class TensorFlowNode(object):
33 | '''An intermediate representation for TensorFlow operations.'''
34 |
35 | def __init__(self, op, *args, **kwargs):
36 | # A string corresponding to the TensorFlow operation
37 | self.op = op
38 | # Positional arguments for the operation
39 | self.args = args
40 | # Keyword arguments for the operation
41 | self.kwargs = list(kwargs.items())
42 | # The source Caffe node
43 | self.node = None
44 |
45 | def format(self, arg):
46 | '''Returns a string representation for the given value.'''
47 | return "'%s'" % arg if isinstance(arg, basestring) else str(arg)
48 |
49 | def pair(self, key, value):
50 | '''Returns key=formatted(value).'''
51 | return '%s=%s' % (key, self.format(value))
52 |
53 | def emit(self):
54 | '''Emits the Python source for this node.'''
55 | # Format positional arguments
56 | args = map(self.format, self.args)
57 | # Format any keyword arguments
58 | if self.kwargs:
59 | args += [self.pair(k, v) for k, v in self.kwargs]
60 | # Set the node name
61 | args.append(self.pair('name', self.node.name))
62 | args = ', '.join(args)
63 | return '%s(%s)' % (self.op, args)
64 |
65 |
66 | class MaybeActivated(object):
67 |
68 | def __init__(self, node, default=True):
69 | self.inject_kwargs = {}
70 | if node.metadata.get('relu', False) != default:
71 | self.inject_kwargs['relu'] = not default
72 |
73 | def __call__(self, *args, **kwargs):
74 | kwargs.update(self.inject_kwargs)
75 | return TensorFlowNode(*args, **kwargs)
76 |
77 |
78 | class TensorFlowMapper(NodeMapper):
79 |
80 | def get_kernel_params(self, node):
81 | kernel_params = node.layer.kernel_parameters
82 | input_shape = node.get_only_parent().output_shape
83 | padding = get_padding_type(kernel_params, input_shape, node.output_shape)
84 | # Only emit the padding if it's not the default value.
85 | padding = {'padding': padding} if padding != network.DEFAULT_PADDING else {}
86 | return (kernel_params, padding)
87 |
88 | def map_convolution(self, node):
89 | (kernel_params, kwargs) = self.get_kernel_params(node)
90 | h = kernel_params.kernel_h
91 | w = kernel_params.kernel_w
92 | c_o = node.output_shape[1]
93 | c_i = node.parents[0].output_shape[1]
94 | group = node.parameters.group
95 | if group != 1:
96 | kwargs['group'] = group
97 | if not node.parameters.bias_term:
98 | kwargs['biased'] = False
99 | assert kernel_params.kernel_h == h
100 | assert kernel_params.kernel_w == w
101 | return MaybeActivated(node)('conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o,
102 | kernel_params.stride_h, kernel_params.stride_w, **kwargs)
103 |
104 | def map_relu(self, node):
105 | return TensorFlowNode('relu')
106 |
107 | def map_pooling(self, node):
108 | pool_type = node.parameters.pool
109 | if pool_type == 0:
110 | pool_op = 'max_pool'
111 | elif pool_type == 1:
112 | pool_op = 'avg_pool'
113 | else:
114 | # Stochastic pooling, for instance.
115 | raise KaffeError('Unsupported pooling type.')
116 | (kernel_params, padding) = self.get_kernel_params(node)
117 | return TensorFlowNode(pool_op, kernel_params.kernel_h, kernel_params.kernel_w,
118 | kernel_params.stride_h, kernel_params.stride_w, **padding)
119 |
120 | def map_inner_product(self, node):
121 | #TODO: Axis
122 | assert node.parameters.axis == 1
123 | #TODO: Unbiased
124 | assert node.parameters.bias_term == True
125 | return MaybeActivated(node)('fc', node.parameters.num_output)
126 |
127 | def map_softmax(self, node):
128 | return TensorFlowNode('softmax')
129 |
130 | def map_lrn(self, node):
131 | params = node.parameters
132 | # The window size must be an odd value. For a window
133 | # size of (2*n+1), TensorFlow defines depth_radius = n.
134 | assert params.local_size % 2 == 1
135 | # Caffe scales by (alpha/(2*n+1)), whereas TensorFlow
136 | # just scales by alpha (as does Krizhevsky's paper).
137 | # We'll account for that here.
138 | alpha = params.alpha / float(params.local_size)
139 | return TensorFlowNode('lrn', int(params.local_size / 2), alpha, params.beta)
140 |
141 | def map_concat(self, node):
142 | axis = (2, 3, 1, 0)[node.parameters.axis]
143 | return TensorFlowNode('concat', axis)
144 |
145 | def map_dropout(self, node):
146 | return TensorFlowNode('dropout', node.parameters.dropout_ratio)
147 |
148 | def map_batch_norm(self, node):
149 | scale_offset = len(node.data) == 4
150 | kwargs = {'is_training': True} if scale_offset else {'is_training': True, 'scale': False}
151 | return MaybeActivated(node, default=False)('batch_normalization', **kwargs)
152 |
153 | def map_eltwise(self, node):
154 | operations = {0: 'multiply', 1: 'add', 2: 'max'}
155 | op_code = node.parameters.operation
156 | try:
157 | return TensorFlowNode(operations[op_code])
158 | except KeyError:
159 | raise KaffeError('Unknown elementwise operation: {}'.format(op_code))
160 |
161 | def commit(self, chains):
162 | return chains
163 |
164 |
165 | class TensorFlowEmitter(object):
166 |
167 | def __init__(self, tab=None):
168 | self.tab = tab or ' ' * 4
169 | self.prefix = ''
170 |
171 | def indent(self):
172 | self.prefix += self.tab
173 |
174 | def outdent(self):
175 | self.prefix = self.prefix[:-len(self.tab)]
176 |
177 | def statement(self, s):
178 | return self.prefix + s + '\n'
179 |
180 | def emit_imports(self):
181 | return self.statement('from kaffe.tensorflow import Network\n')
182 |
183 | def emit_class_def(self, name):
184 | return self.statement('class %s(Network):' % (name))
185 |
186 | def emit_setup_def(self):
187 | return self.statement('def setup(self):')
188 |
189 | def emit_parents(self, chain):
190 | assert len(chain)
191 | s = '(self.feed('
192 | sep = ', \n' + self.prefix + (' ' * len(s))
193 | s += sep.join(["'%s'" % parent.name for parent in chain[0].node.parents])
194 | return self.statement(s + ')')
195 |
196 | def emit_node(self, node):
197 | return self.statement(' ' * 5 + '.' + node.emit())
198 |
199 | def emit(self, name, chains):
200 | s = self.emit_imports()
201 | s += self.emit_class_def(name)
202 | self.indent()
203 | s += self.emit_setup_def()
204 | self.indent()
205 | blocks = []
206 | for chain in chains:
207 | b = ''
208 | b += self.emit_parents(chain)
209 | for node in chain:
210 | b += self.emit_node(node)
211 | blocks.append(b[:-1] + ')')
212 | s = s + '\n\n'.join(blocks)
213 | return s
214 |
215 |
216 | class TensorFlowTransformer(object):
217 |
218 | def __init__(self, def_path, data_path, verbose=True, phase='test'):
219 | self.verbose = verbose
220 | self.phase = phase
221 | self.load(def_path, data_path, phase)
222 | self.params = None
223 | self.source = None
224 |
225 | def load(self, def_path, data_path, phase):
226 | # Build the graph
227 | graph = GraphBuilder(def_path, phase).build()
228 |
229 | if data_path is not None:
230 | # Load and associate learned parameters
231 | graph = DataInjector(def_path, data_path)(graph)
232 |
233 | # Transform the graph
234 | transformers = [
235 | # Fuse split batch normalization layers
236 | BatchNormScaleBiasFuser(),
237 |
238 | # Fuse ReLUs
239 | # TODO: Move non-linearity application to layer wrapper, allowing
240 | # any arbitrary operation to be optionally activated.
241 | ReLUFuser(allowed_parent_types=[NodeKind.Convolution, NodeKind.InnerProduct,
242 | NodeKind.BatchNorm]),
243 |
244 | # Rename nodes
245 | # Slashes are used for scoping in TensorFlow. Replace slashes
246 | # in node names with underscores.
247 | # (Caffe's GoogLeNet implementation uses slashes)
248 | NodeRenamer(lambda node: node.name.replace('/', '_'))
249 | ]
250 | self.graph = graph.transformed(transformers)
251 |
252 | # Display the graph
253 | if self.verbose:
254 | print_stderr(self.graph)
255 |
256 | def transform_data(self):
257 | if self.params is None:
258 | transformers = [
259 |
260 | # Reshape the parameters to TensorFlow's ordering
261 | DataReshaper({
262 | # (c_o, c_i, h, w) -> (h, w, c_i, c_o)
263 | NodeKind.Convolution: (2, 3, 1, 0),
264 |
265 | # (c_o, c_i) -> (c_i, c_o)
266 | NodeKind.InnerProduct: (1, 0)
267 | }),
268 |
269 | # Pre-process batch normalization data
270 | BatchNormPreprocessor(),
271 |
272 | # Convert parameters to dictionaries
273 | ParameterNamer(),
274 | ]
275 | self.graph = self.graph.transformed(transformers)
276 | self.params = {node.name: node.data for node in self.graph.nodes if node.data}
277 | return self.params
278 |
279 | def transform_source(self):
280 | if self.source is None:
281 | mapper = TensorFlowMapper(self.graph)
282 | chains = mapper.map()
283 | emitter = TensorFlowEmitter()
284 | self.source = emitter.emit(self.graph.name, chains)
285 | return self.source
286 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/transformers.py:
--------------------------------------------------------------------------------
1 | '''
2 | A collection of graph transforms.
3 |
4 | A transformer is a callable that accepts a graph and returns a transformed version.
5 | '''
6 |
7 | import numpy as np
8 |
9 | from .caffe import get_caffe_resolver, has_pycaffe
10 | from .errors import KaffeError, print_stderr
11 | from .layers import NodeKind
12 |
13 |
14 | class DataInjector(object):
15 | '''
16 | Associates parameters loaded from a .caffemodel file with their corresponding nodes.
17 | '''
18 |
19 | def __init__(self, def_path, data_path):
20 | # The .prototxt file defining the graph
21 | self.def_path = def_path
22 | # The .caffemodel file containing the learned parameters
23 | self.data_path = data_path
24 | # Set to true if the fallback protocol-buffer based backend was used
25 | self.did_use_pb = False
26 | # A list containing (layer name, parameters) tuples
27 | self.params = None
28 | # Load the parameters
29 | self.load()
30 |
31 | def load(self):
32 | if has_pycaffe():
33 | self.load_using_caffe()
34 | else:
35 | self.load_using_pb()
36 |
37 | def load_using_caffe(self):
38 | caffe = get_caffe_resolver().caffe
39 | net = caffe.Net(self.def_path, self.data_path, caffe.TEST)
40 | data = lambda blob: blob.data
41 | self.params = [(k, map(data, v)) for k, v in net.params.items()]
42 |
43 | def load_using_pb(self):
44 | data = get_caffe_resolver().NetParameter()
45 | data.MergeFromString(open(self.data_path, 'rb').read())
46 | pair = lambda layer: (layer.name, self.normalize_pb_data(layer))
47 | layers = data.layers or data.layer
48 | self.params = [pair(layer) for layer in layers if layer.blobs]
49 | self.did_use_pb = True
50 |
51 | def normalize_pb_data(self, layer):
52 | transformed = []
53 | for blob in layer.blobs:
54 | if len(blob.shape.dim):
55 | dims = blob.shape.dim
56 | c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims))
57 | else:
58 | c_o = blob.num
59 | c_i = blob.channels
60 | h = blob.height
61 | w = blob.width
62 | data = np.array(blob.data, dtype=np.float32).reshape(c_o, c_i, h, w)
63 | transformed.append(data)
64 | return transformed
65 |
66 | def adjust_parameters(self, node, data):
67 | if not self.did_use_pb:
68 | return data
69 | # When using the protobuf-backend, each parameter initially has four dimensions.
70 | # In certain cases (like FC layers), we want to eliminate the singleton dimensions.
71 | # This implementation takes care of the common cases. However, it does leave the
72 | # potential for future issues.
73 | # The Caffe-backend does not suffer from this problem.
74 | data = list(data)
75 | squeeze_indices = [1] # Squeeze biases.
76 | if node.kind == NodeKind.InnerProduct:
77 | squeeze_indices.append(0) # Squeeze FC.
78 | for idx in squeeze_indices:
79 | data[idx] = np.squeeze(data[idx])
80 | return data
81 |
82 | def __call__(self, graph):
83 | for layer_name, data in self.params:
84 | if layer_name in graph:
85 | node = graph.get_node(layer_name)
86 | node.data = self.adjust_parameters(node, data)
87 | else:
88 | print_stderr('Ignoring parameters for non-existent layer: %s' % layer_name)
89 | return graph
90 |
91 |
92 | class DataReshaper(object):
93 |
94 | def __init__(self, mapping, replace=True):
95 | # A dictionary mapping NodeKind to the transposed order.
96 | self.mapping = mapping
97 | # The node kinds eligible for reshaping
98 | self.reshaped_node_types = self.mapping.keys()
99 | # If true, the reshaped data will replace the old one.
100 | # Otherwise, it's set to the reshaped_data attribute.
101 | self.replace = replace
102 |
103 | def has_spatial_parent(self, node):
104 | try:
105 | parent = node.get_only_parent()
106 | s = parent.output_shape
107 | return s.height > 1 or s.width > 1
108 | except KaffeError:
109 | return False
110 |
111 | def map(self, node_kind):
112 | try:
113 | return self.mapping[node_kind]
114 | except KeyError:
115 | raise KaffeError('Ordering not found for node kind: {}'.format(node_kind))
116 |
117 | def __call__(self, graph):
118 | for node in graph.nodes:
119 | if node.data is None:
120 | continue
121 | if node.kind not in self.reshaped_node_types:
122 | # Check for 2+ dimensional data
123 | if any(len(tensor.shape) > 1 for tensor in node.data):
124 | print_stderr('Warning: parmaters not reshaped for node: {}'.format(node))
125 | continue
126 | transpose_order = self.map(node.kind)
127 | weights = node.data[0]
128 | if (node.kind == NodeKind.InnerProduct) and self.has_spatial_parent(node):
129 | # The FC layer connected to the spatial layer needs to be
130 | # re-wired to match the new spatial ordering.
131 | in_shape = node.get_only_parent().output_shape
132 | fc_shape = weights.shape
133 | output_channels = fc_shape[0]
134 | weights = weights.reshape((output_channels, in_shape.channels, in_shape.height,
135 | in_shape.width))
136 | weights = weights.transpose(self.map(NodeKind.Convolution))
137 | node.reshaped_data = weights.reshape(fc_shape[transpose_order[0]],
138 | fc_shape[transpose_order[1]])
139 | else:
140 | node.reshaped_data = weights.transpose(transpose_order)
141 |
142 | if self.replace:
143 | for node in graph.nodes:
144 | if hasattr(node, 'reshaped_data'):
145 | # Set the weights
146 | node.data[0] = node.reshaped_data
147 | del node.reshaped_data
148 | return graph
149 |
150 |
151 | class SubNodeFuser(object):
152 | '''
153 | An abstract helper for merging a single-child with its single-parent.
154 | '''
155 |
156 | def __call__(self, graph):
157 | nodes = graph.nodes
158 | fused_nodes = []
159 | for node in nodes:
160 | if len(node.parents) != 1:
161 | # We're only fusing nodes with single parents
162 | continue
163 | parent = node.get_only_parent()
164 | if len(parent.children) != 1:
165 | # We can only fuse a node if its parent's
166 | # value isn't used by any other node.
167 | continue
168 | if not self.is_eligible_pair(parent, node):
169 | continue
170 | # Rewrite the fused node's children to its parent.
171 | for child in node.children:
172 | child.parents.remove(node)
173 | parent.add_child(child)
174 | # Disconnect the fused node from the graph.
175 | parent.children.remove(node)
176 | fused_nodes.append(node)
177 | # Let the sub-class merge the fused node in any arbitrary way.
178 | self.merge(parent, node)
179 | transformed_nodes = [node for node in nodes if node not in fused_nodes]
180 | return graph.replaced(transformed_nodes)
181 |
182 | def is_eligible_pair(self, parent, child):
183 | '''Returns true if this parent/child pair is eligible for fusion.'''
184 | raise NotImplementedError('Must be implemented by subclass.')
185 |
186 | def merge(self, parent, child):
187 | '''Merge the child node into the parent.'''
188 | raise NotImplementedError('Must be implemented by subclass')
189 |
190 |
191 | class ReLUFuser(SubNodeFuser):
192 | '''
193 | Fuses rectified linear units with their parent nodes.
194 | '''
195 |
196 | def __init__(self, allowed_parent_types=None):
197 | # Fuse ReLUs when the parent node is one of the given types.
198 | # If None, all node types are eligible.
199 | self.allowed_parent_types = allowed_parent_types
200 |
201 | def is_eligible_pair(self, parent, child):
202 | return ((self.allowed_parent_types is None or parent.kind in self.allowed_parent_types) and
203 | child.kind == NodeKind.ReLU)
204 |
205 | def merge(self, parent, _):
206 | parent.metadata['relu'] = True
207 |
208 |
209 | class BatchNormScaleBiasFuser(SubNodeFuser):
210 | '''
211 | The original batch normalization paper includes two learned
212 | parameters: a scaling factor \gamma and a bias \beta.
213 | Caffe's implementation does not include these two. However, it is commonly
214 | replicated by adding a scaling+bias layer immidiately after the batch norm.
215 |
216 | This fuser merges the scaling+bias layer with the batch norm.
217 | '''
218 |
219 | def is_eligible_pair(self, parent, child):
220 | return (parent.kind == NodeKind.BatchNorm and child.kind == NodeKind.Scale and
221 | child.parameters.axis == 1 and child.parameters.bias_term == True)
222 |
223 | def merge(self, parent, child):
224 | parent.scale_bias_node = child
225 |
226 |
227 | class BatchNormPreprocessor(object):
228 | '''
229 | Prescale batch normalization parameters.
230 | Concatenate gamma (scale) and beta (bias) terms if set.
231 | '''
232 |
233 | def __call__(self, graph):
234 | for node in graph.nodes:
235 | if node.kind != NodeKind.BatchNorm:
236 | continue
237 | assert node.data is not None
238 | assert len(node.data) == 3
239 | mean, variance, scale = node.data
240 | # Prescale the stats
241 | scaling_factor = 1.0 / scale if scale != 0 else 0
242 | mean *= scaling_factor
243 | variance *= scaling_factor
244 | # Replace with the updated values
245 | node.data = [mean, variance]
246 | if hasattr(node, 'scale_bias_node'):
247 | # Include the scale and bias terms
248 | gamma, beta = node.scale_bias_node.data
249 | node.data += [gamma, beta]
250 | return graph
251 |
252 |
253 | class NodeRenamer(object):
254 | '''
255 | Renames nodes in the graph using a given unary function that
256 | accepts a node and returns its new name.
257 | '''
258 |
259 | def __init__(self, renamer):
260 | self.renamer = renamer
261 |
262 | def __call__(self, graph):
263 | for node in graph.nodes:
264 | node.name = self.renamer(node)
265 | return graph
266 |
267 |
268 | class ParameterNamer(object):
269 | '''
270 | Convert layer data arrays to a dictionary mapping parameter names to their values.
271 | '''
272 |
273 | def __call__(self, graph):
274 | for node in graph.nodes:
275 | if node.data is None:
276 | continue
277 | if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct):
278 | names = ('weights',)
279 | if node.parameters.bias_term:
280 | names += ('biases',)
281 | elif node.kind == NodeKind.BatchNorm:
282 | names = ('moving_mean', 'moving_variance')
283 | if len(node.data) == 4:
284 | names += ('gamma', 'beta')
285 | else:
286 | print_stderr('WARNING: Unhandled parameters: {}'.format(node.kind))
287 | continue
288 | assert len(names) == len(node.data)
289 | node.data = dict(zip(names, node.data))
290 | return graph
291 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/tensorflow/network.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | slim = tf.contrib.slim
4 |
5 | DEFAULT_PADDING = 'SAME'
6 |
7 |
8 | def layer(op):
9 | '''Decorator for composable network layers.'''
10 |
11 | def layer_decorated(self, *args, **kwargs):
12 | # Automatically set a name if not provided.
13 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
14 | # Figure out the layer inputs.
15 | if len(self.terminals) == 0:
16 | raise RuntimeError('No input variables found for layer %s.' % name)
17 | elif len(self.terminals) == 1:
18 | layer_input = self.terminals[0]
19 | else:
20 | layer_input = list(self.terminals)
21 | # Perform the operation and get the output.
22 | layer_output = op(self, layer_input, *args, **kwargs)
23 | # Add to layer LUT.
24 | self.layers[name] = layer_output
25 | # This output is now the input for the next layer.
26 | self.feed(layer_output)
27 | # Return self for chained calls.
28 | return self
29 |
30 | return layer_decorated
31 |
32 |
33 | class Network(object):
34 |
35 | def __init__(self, inputs, trainable=True, is_training=False, num_classes=21):
36 | # The input nodes for this network
37 | self.inputs = inputs
38 | # The current list of terminal nodes
39 | self.terminals = []
40 | # Mapping from layer names to layers
41 | self.layers = dict(inputs)
42 | # If true, the resulting variables are set as trainable
43 | self.trainable = trainable
44 | # Switch variable for dropout
45 | self.use_dropout = tf.placeholder_with_default(tf.constant(1.0),
46 | shape=[],
47 | name='use_dropout')
48 | self.setup(is_training, num_classes)
49 |
50 | def setup(self, is_training):
51 | '''Construct the network. '''
52 | raise NotImplementedError('Must be implemented by the subclass.')
53 |
54 | def load(self, data_path, session, ignore_missing=False):
55 | '''Load network weights.
56 | data_path: The path to the numpy-serialized network weights
57 | session: The current TensorFlow session
58 | ignore_missing: If true, serialized weights for missing layers are ignored.
59 | '''
60 | data_dict = np.load(data_path).item()
61 | for op_name in data_dict:
62 | with tf.variable_scope(op_name, reuse=True):
63 | for param_name, data in data_dict[op_name].iteritems():
64 | try:
65 | var = tf.get_variable(param_name)
66 | session.run(var.assign(data))
67 | except ValueError:
68 | if not ignore_missing:
69 | raise
70 |
71 | def feed(self, *args):
72 | '''Set the input(s) for the next operation by replacing the terminal nodes.
73 | The arguments can be either layer names or the actual layers.
74 | '''
75 | assert len(args) != 0
76 | self.terminals = []
77 | for fed_layer in args:
78 | if isinstance(fed_layer, basestring):
79 | try:
80 | fed_layer = self.layers[fed_layer]
81 | except KeyError:
82 | raise KeyError('Unknown layer name fed: %s' % fed_layer)
83 | self.terminals.append(fed_layer)
84 | return self
85 |
86 | def get_output(self):
87 | '''Returns the current network output.'''
88 | return self.terminals[-1]
89 |
90 | def get_unique_name(self, prefix):
91 | '''Returns an index-suffixed unique name for the given prefix.
92 | This is used for auto-generating layer names based on the type-prefix.
93 | '''
94 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
95 | return '%s_%d' % (prefix, ident)
96 |
97 | def make_var(self, name, shape):
98 | '''Creates a new TensorFlow variable.'''
99 | return tf.get_variable(name, shape, trainable=self.trainable)
100 |
101 | def validate_padding(self, padding):
102 | '''Verifies that the padding is one of the supported ones.'''
103 | assert padding in ('SAME', 'VALID')
104 |
105 | @layer
106 | def conv(self,
107 | input,
108 | k_h,
109 | k_w,
110 | c_o,
111 | s_h,
112 | s_w,
113 | name,
114 | relu=True,
115 | padding=DEFAULT_PADDING,
116 | group=1,
117 | biased=True):
118 | # Verify that the padding is acceptable
119 | self.validate_padding(padding)
120 | # Get the number of channels in the input
121 | c_i = input.get_shape()[-1]
122 | # Verify that the grouping parameter is valid
123 | assert c_i % group == 0
124 | assert c_o % group == 0
125 | # Convolution for a given input and kernel
126 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
127 | with tf.variable_scope(name) as scope:
128 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o])
129 | if group == 1:
130 | # This is the common-case. Convolve the input without any further complications.
131 | output = convolve(input, kernel)
132 | else:
133 | # Split the input into groups and then convolve each of them independently
134 | input_groups = tf.split(3, group, input)
135 | kernel_groups = tf.split(3, group, kernel)
136 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
137 | # Concatenate the groups
138 | output = tf.concat(3, output_groups)
139 | # Add the biases
140 | if biased:
141 | biases = self.make_var('biases', [c_o])
142 | output = tf.nn.bias_add(output, biases)
143 | if relu:
144 | # ReLU non-linearity
145 | output = tf.nn.relu(output, name=scope.name)
146 | return output
147 |
148 | @layer
149 | def atrous_conv(self,
150 | input,
151 | k_h,
152 | k_w,
153 | c_o,
154 | dilation,
155 | name,
156 | relu=True,
157 | padding=DEFAULT_PADDING,
158 | group=1,
159 | biased=True):
160 | # Verify that the padding is acceptable
161 | self.validate_padding(padding)
162 | # Get the number of channels in the input
163 | c_i = input.get_shape()[-1]
164 | # Verify that the grouping parameter is valid
165 | assert c_i % group == 0
166 | assert c_o % group == 0
167 | # Convolution for a given input and kernel
168 | convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=padding)
169 | with tf.variable_scope(name) as scope:
170 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o])
171 | if group == 1:
172 | # This is the common-case. Convolve the input without any further complications.
173 | output = convolve(input, kernel)
174 | else:
175 | # Split the input into groups and then convolve each of them independently
176 | input_groups = tf.split(3, group, input)
177 | kernel_groups = tf.split(3, group, kernel)
178 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
179 | # Concatenate the groups
180 | output = tf.concat(3, output_groups)
181 | # Add the biases
182 | if biased:
183 | biases = self.make_var('biases', [c_o])
184 | output = tf.nn.bias_add(output, biases)
185 | if relu:
186 | # ReLU non-linearity
187 | output = tf.nn.relu(output, name=scope.name)
188 | return output
189 |
190 | @layer
191 | def relu(self, input, name):
192 | return tf.nn.relu(input, name=name)
193 |
194 | @layer
195 | def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
196 | self.validate_padding(padding)
197 | return tf.nn.max_pool(input,
198 | ksize=[1, k_h, k_w, 1],
199 | strides=[1, s_h, s_w, 1],
200 | padding=padding,
201 | name=name)
202 |
203 | @layer
204 | def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
205 | self.validate_padding(padding)
206 | return tf.nn.avg_pool(input,
207 | ksize=[1, k_h, k_w, 1],
208 | strides=[1, s_h, s_w, 1],
209 | padding=padding,
210 | name=name)
211 |
212 | @layer
213 | def lrn(self, input, radius, alpha, beta, name, bias=1.0):
214 | return tf.nn.local_response_normalization(input,
215 | depth_radius=radius,
216 | alpha=alpha,
217 | beta=beta,
218 | bias=bias,
219 | name=name)
220 |
221 | @layer
222 | def concat(self, inputs, axis, name):
223 | return tf.concat(concat_dim=axis, values=inputs, name=name)
224 |
225 | @layer
226 | def add(self, inputs, name):
227 | return tf.add_n(inputs, name=name)
228 |
229 | @layer
230 | def fc(self, input, num_out, name, relu=True):
231 | with tf.variable_scope(name) as scope:
232 | input_shape = input.get_shape()
233 | if input_shape.ndims == 4:
234 | # The input is spatial. Vectorize it first.
235 | dim = 1
236 | for d in input_shape[1:].as_list():
237 | dim *= d
238 | feed_in = tf.reshape(input, [-1, dim])
239 | else:
240 | feed_in, dim = (input, input_shape[-1].value)
241 | weights = self.make_var('weights', shape=[dim, num_out])
242 | biases = self.make_var('biases', [num_out])
243 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
244 | fc = op(feed_in, weights, biases, name=scope.name)
245 | return fc
246 |
247 | @layer
248 | def softmax(self, input, name):
249 | input_shape = map(lambda v: v.value, input.get_shape())
250 | if len(input_shape) > 2:
251 | # For certain models (like NiN), the singleton spatial dimensions
252 | # need to be explicitly squeezed, since they're not broadcast-able
253 | # in TensorFlow's NHWC ordering (unlike Caffe's NCHW).
254 | if input_shape[1] == 1 and input_shape[2] == 1:
255 | input = tf.squeeze(input, squeeze_dims=[1, 2])
256 | else:
257 | raise ValueError('Rank 2 tensor input expected for softmax!')
258 | return tf.nn.softmax(input, name)
259 |
260 | @layer
261 | def batch_normalization(self, input, name, is_training, activation_fn=None, scale=True):
262 | with tf.variable_scope(name) as scope:
263 | output = slim.batch_norm(
264 | input,
265 | activation_fn=activation_fn,
266 | is_training=is_training,
267 | updates_collections=None,
268 | scale=scale,
269 | scope=scope)
270 | return output
271 |
272 | @layer
273 | def dropout(self, input, keep_prob, name):
274 | keep = 1 - self.use_dropout + (self.use_dropout * keep_prob)
275 | return tf.nn.dropout(input, keep, name=name)
276 |
--------------------------------------------------------------------------------
/parsing_network/kaffe/graph.py:
--------------------------------------------------------------------------------
1 | from google.protobuf import text_format
2 |
3 | from .caffe import get_caffe_resolver
4 | from .errors import KaffeError, print_stderr
5 | from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch
6 | from .shapes import TensorShape
7 |
8 | class Node(object):
9 |
10 | def __init__(self, name, kind, layer=None):
11 | self.name = name
12 | self.kind = kind
13 | self.layer = LayerAdapter(layer, kind) if layer else None
14 | self.parents = []
15 | self.children = []
16 | self.data = None
17 | self.output_shape = None
18 | self.metadata = {}
19 |
20 | def add_parent(self, parent_node):
21 | assert parent_node not in self.parents
22 | self.parents.append(parent_node)
23 | if self not in parent_node.children:
24 | parent_node.children.append(self)
25 |
26 | def add_child(self, child_node):
27 | assert child_node not in self.children
28 | self.children.append(child_node)
29 | if self not in child_node.parents:
30 | child_node.parents.append(self)
31 |
32 | def get_only_parent(self):
33 | if len(self.parents) != 1:
34 | raise KaffeError('Node (%s) expected to have 1 parent. Found %s.' %
35 | (self, len(self.parents)))
36 | return self.parents[0]
37 |
38 | @property
39 | def parameters(self):
40 | if self.layer is not None:
41 | return self.layer.parameters
42 | return None
43 |
44 | def __str__(self):
45 | return '[%s] %s' % (self.kind, self.name)
46 |
47 | def __repr__(self):
48 | return '%s (0x%x)' % (self.name, id(self))
49 |
50 |
51 | class Graph(object):
52 |
53 | def __init__(self, nodes=None, name=None):
54 | self.nodes = nodes or []
55 | self.node_lut = {node.name: node for node in self.nodes}
56 | self.name = name
57 |
58 | def add_node(self, node):
59 | self.nodes.append(node)
60 | self.node_lut[node.name] = node
61 |
62 | def get_node(self, name):
63 | try:
64 | return self.node_lut[name]
65 | except KeyError:
66 | raise KaffeError('Layer not found: %s' % name)
67 |
68 | def get_input_nodes(self):
69 | return [node for node in self.nodes if len(node.parents) == 0]
70 |
71 | def get_output_nodes(self):
72 | return [node for node in self.nodes if len(node.children) == 0]
73 |
74 | def topologically_sorted(self):
75 | sorted_nodes = []
76 | unsorted_nodes = list(self.nodes)
77 | temp_marked = set()
78 | perm_marked = set()
79 |
80 | def visit(node):
81 | if node in temp_marked:
82 | raise KaffeError('Graph is not a DAG.')
83 | if node in perm_marked:
84 | return
85 | temp_marked.add(node)
86 | for child in node.children:
87 | visit(child)
88 | perm_marked.add(node)
89 | temp_marked.remove(node)
90 | sorted_nodes.insert(0, node)
91 |
92 | while len(unsorted_nodes):
93 | visit(unsorted_nodes.pop())
94 | return sorted_nodes
95 |
96 | def compute_output_shapes(self):
97 | sorted_nodes = self.topologically_sorted()
98 | for node in sorted_nodes:
99 | node.output_shape = TensorShape(*NodeKind.compute_output_shape(node))
100 |
101 | def replaced(self, new_nodes):
102 | return Graph(nodes=new_nodes, name=self.name)
103 |
104 | def transformed(self, transformers):
105 | graph = self
106 | for transformer in transformers:
107 | graph = transformer(graph)
108 | if graph is None:
109 | raise KaffeError('Transformer failed: {}'.format(transformer))
110 | assert isinstance(graph, Graph)
111 | return graph
112 |
113 | def __contains__(self, key):
114 | return key in self.node_lut
115 |
116 | def __str__(self):
117 | hdr = '{:<20} {:<30} {:>20} {:>20}'.format('Type', 'Name', 'Param', 'Output')
118 | s = [hdr, '-' * 94]
119 | for node in self.topologically_sorted():
120 | # If the node has learned parameters, display the first one's shape.
121 | # In case of convolutions, this corresponds to the weights.
122 | data_shape = node.data[0].shape if node.data else '--'
123 | out_shape = node.output_shape or '--'
124 | s.append('{:<20} {:<30} {:>20} {:>20}'.format(node.kind, node.name, data_shape,
125 | tuple(out_shape)))
126 | return '\n'.join(s)
127 |
128 |
129 | class GraphBuilder(object):
130 | '''Constructs a model graph from a Caffe protocol buffer definition.'''
131 |
132 | def __init__(self, def_path, phase='test'):
133 | '''
134 | def_path: Path to the model definition (.prototxt)
135 | data_path: Path to the model data (.caffemodel)
136 | phase: Either 'test' or 'train'. Used for filtering phase-specific nodes.
137 | '''
138 | self.def_path = def_path
139 | self.phase = phase
140 | self.load()
141 |
142 | def load(self):
143 | '''Load the layer definitions from the prototxt.'''
144 | self.params = get_caffe_resolver().NetParameter()
145 | with open(self.def_path, 'rb') as def_file:
146 | text_format.Merge(def_file.read(), self.params)
147 |
148 | def filter_layers(self, layers):
149 | '''Filter out layers based on the current phase.'''
150 | phase_map = {0: 'train', 1: 'test'}
151 | filtered_layer_names = set()
152 | filtered_layers = []
153 | for layer in layers:
154 | phase = self.phase
155 | if len(layer.include):
156 | phase = phase_map[layer.include[0].phase]
157 | if len(layer.exclude):
158 | phase = phase_map[1 - layer.include[0].phase]
159 | exclude = (phase != self.phase)
160 | # Dropout layers appear in a fair number of Caffe
161 | # test-time networks. These are just ignored. We'll
162 | # filter them out here.
163 | if (not exclude) and (phase == 'test'):
164 | exclude = (layer.type == LayerType.Dropout)
165 | if not exclude:
166 | filtered_layers.append(layer)
167 | # Guard against dupes.
168 | assert layer.name not in filtered_layer_names
169 | filtered_layer_names.add(layer.name)
170 | return filtered_layers
171 |
172 | def make_node(self, layer):
173 | '''Create a graph node for the given layer.'''
174 | kind = NodeKind.map_raw_kind(layer.type)
175 | if kind is None:
176 | raise KaffeError('Unknown layer type encountered: %s' % layer.type)
177 | # We want to use the layer's top names (the "output" names), rather than the
178 | # name attribute, which is more of readability thing than a functional one.
179 | # Other layers will refer to a node by its "top name".
180 | return Node(layer.name, kind, layer=layer)
181 |
182 | def make_input_nodes(self):
183 | '''
184 | Create data input nodes.
185 |
186 | This method is for old-style inputs, where the input specification
187 | was not treated as a first-class layer in the prototext.
188 | Newer models use the "Input layer" type.
189 | '''
190 | nodes = [Node(name, NodeKind.Data) for name in self.params.input]
191 | if len(nodes):
192 | input_dim = map(int, self.params.input_dim)
193 | if not input_dim:
194 | if len(self.params.input_shape) > 0:
195 | input_dim = map(int, self.params.input_shape[0].dim)
196 | else:
197 | raise KaffeError('Dimensions for input not specified.')
198 | for node in nodes:
199 | node.output_shape = tuple(input_dim)
200 | return nodes
201 |
202 | def build(self):
203 | '''
204 | Builds the graph from the Caffe layer definitions.
205 | '''
206 | # Get the layers
207 | layers = self.params.layers or self.params.layer
208 | # Filter out phase-excluded layers
209 | layers = self.filter_layers(layers)
210 | # Get any separately-specified input layers
211 | nodes = self.make_input_nodes()
212 | nodes += [self.make_node(layer) for layer in layers]
213 | # Initialize the graph
214 | graph = Graph(nodes=nodes, name=self.params.name)
215 | # Connect the nodes
216 | #
217 | # A note on layers and outputs:
218 | # In Caffe, each layer can produce multiple outputs ("tops") from a set of inputs
219 | # ("bottoms"). The bottoms refer to other layers' tops. The top can rewrite a bottom
220 | # (in case of in-place operations). Note that the layer's name is not used for establishing
221 | # any connectivity. It's only used for data association. By convention, a layer with a
222 | # single top will often use the same name (although this is not required).
223 | #
224 | # The current implementation only supports single-output nodes (note that a node can still
225 | # have multiple children, since multiple child nodes can refer to the single top's name).
226 | node_outputs = {}
227 | for layer in layers:
228 | node = graph.get_node(layer.name)
229 | for input_name in layer.bottom:
230 | assert input_name != layer.name
231 | parent_node = node_outputs.get(input_name)
232 | if (parent_node is None) or (parent_node == node):
233 | parent_node = graph.get_node(input_name)
234 | node.add_parent(parent_node)
235 | if len(layer.top)>1:
236 | raise KaffeError('Multiple top nodes are not supported.')
237 | for output_name in layer.top:
238 | if output_name == layer.name:
239 | # Output is named the same as the node. No further action required.
240 | continue
241 | # There are two possibilities here:
242 | #
243 | # Case 1: output_name refers to another node in the graph.
244 | # This is an "in-place operation" that overwrites an existing node.
245 | # This would create a cycle in the graph. We'll undo the in-placing
246 | # by substituting this node wherever the overwritten node is referenced.
247 | #
248 | # Case 2: output_name violates the convention layer.name == output_name.
249 | # Since we are working in the single-output regime, we will can rename it to
250 | # match the layer name.
251 | #
252 | # For both cases, future references to this top re-routes to this node.
253 | node_outputs[output_name] = node
254 |
255 | graph.compute_output_shapes()
256 | return graph
257 |
258 |
259 | class NodeMapper(NodeDispatch):
260 |
261 | def __init__(self, graph):
262 | self.graph = graph
263 |
264 | def map(self):
265 | nodes = self.graph.topologically_sorted()
266 | # Remove input nodes - we'll handle them separately.
267 | input_nodes = self.graph.get_input_nodes()
268 | nodes = [t for t in nodes if t not in input_nodes]
269 | # Decompose DAG into chains.
270 | chains = []
271 | for node in nodes:
272 | attach_to_chain = None
273 | if len(node.parents) == 1:
274 | parent = node.get_only_parent()
275 | for chain in chains:
276 | if chain[-1] == parent:
277 | # Node is part of an existing chain.
278 | attach_to_chain = chain
279 | break
280 | if attach_to_chain is None:
281 | # Start a new chain for this node.
282 | attach_to_chain = []
283 | chains.append(attach_to_chain)
284 | attach_to_chain.append(node)
285 | # Map each chain.
286 | mapped_chains = []
287 | for chain in chains:
288 | mapped_chains.append(self.map_chain(chain))
289 | return self.commit(mapped_chains)
290 |
291 | def map_chain(self, chain):
292 | return [self.map_node(node) for node in chain]
293 |
294 | def map_node(self, node):
295 | map_func = self.get_handler(node.kind, 'map')
296 | mapped_node = map_func(node)
297 | assert mapped_node is not None
298 | mapped_node.node = node
299 | return mapped_node
300 |
301 | def commit(self, mapped_chains):
302 | raise NotImplementedError('Must be implemented by subclass.')
303 |
--------------------------------------------------------------------------------
/parsing_network/train.py:
--------------------------------------------------------------------------------
1 | """Training script for the DeepLab-ResNet network on the PASCAL VOC dataset
2 | for semantic image segmentation.
3 |
4 | This script trains the model using augmented PASCAL VOC,
5 | which contains approximately 10000 images for training and 1500 images for validation.
6 | """
7 |
8 | from __future__ import print_function
9 |
10 | import argparse
11 | from datetime import datetime
12 | import os
13 | import sys
14 | import time
15 |
16 | import tensorflow as tf
17 | import numpy as np
18 |
19 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, inv_preprocess, prepare_label
20 |
21 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
22 |
23 | BATCH_SIZE = 8
24 | DATA_DIRECTORY = '/home/VOCdevkit'
25 | DATA_LIST_PATH = './dataset/pascal_train.txt'
26 | IGNORE_LABEL = 255
27 | INPUT_SIZE = '321,321'
28 | LEARNING_RATE = 2.5e-4
29 | MOMENTUM = 0.9
30 | NUM_CLASSES = 7
31 | NUM_EPOCHS = 90
32 | POWER = 0.9
33 | RANDOM_SEED = 1435
34 | RESTORE_FROM = './deeplab_resnet.ckpt'
35 | SAVE_NUM_IMAGES = 2
36 | SAVE_PRED_EVERY = 10000
37 | SNAPSHOT_DIR = './snapshots/'
38 | WEIGHT_DECAY = 0.0005
39 |
40 |
41 | def get_arguments():
42 | """Parse all the arguments provided from the CLI.
43 |
44 | Returns:
45 | A list of parsed arguments.
46 | """
47 | parser = argparse.ArgumentParser(description="DeepLab-ResNet Network")
48 | parser.add_argument("--batch-size", type=int, default=BATCH_SIZE,
49 | help="Number of images sent to the network in one step.")
50 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY,
51 | help="Path to the directory containing the PASCAL VOC dataset.")
52 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH,
53 | help="Path to the file listing the images in the dataset.")
54 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL,
55 | help="The index of the label to ignore during the training.")
56 | parser.add_argument("--input-size", type=str, default=INPUT_SIZE,
57 | help="Comma-separated string with height and width of images.")
58 | parser.add_argument("--is-training", action="store_true",
59 | help="Whether to updates the running means and variances during the training.")
60 | parser.add_argument("--learning-rate", type=float, default=LEARNING_RATE,
61 | help="Base learning rate for training with polynomial decay.")
62 | parser.add_argument("--momentum", type=float, default=MOMENTUM,
63 | help="Momentum component of the optimiser.")
64 | parser.add_argument("--not-restore-last", action="store_true",
65 | help="Whether to not restore last (FC) layers.")
66 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES,
67 | help="Number of classes to predict (including background).")
68 | parser.add_argument("--num-epochs", type=int, default=NUM_EPOCHS,
69 | help="Number of training epochs.")
70 | parser.add_argument("--power", type=float, default=POWER,
71 | help="Decay parameter to compute the learning rate.")
72 | parser.add_argument("--random-mirror", action="store_true",
73 | help="Whether to randomly mirror the inputs during the training.")
74 | parser.add_argument("--random-scale", action="store_true",
75 | help="Whether to randomly scale the inputs during the training.")
76 | parser.add_argument("--random-seed", type=int, default=RANDOM_SEED,
77 | help="Random seed to have reproducible results.")
78 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM,
79 | help="Where restore model parameters from.")
80 | parser.add_argument("--save-num-images", type=int, default=SAVE_NUM_IMAGES,
81 | help="How many images to save.")
82 | parser.add_argument("--save-pred-every", type=int, default=SAVE_PRED_EVERY,
83 | help="Save summaries and checkpoint every often.")
84 | parser.add_argument("--snapshot-dir", type=str, default=SNAPSHOT_DIR,
85 | help="Where to save snapshots of the model.")
86 | parser.add_argument("--weight-decay", type=float, default=WEIGHT_DECAY,
87 | help="Regularisation parameter for L2-loss.")
88 | return parser.parse_args()
89 |
90 | def save(saver, sess, logdir, step):
91 | '''Save weights.
92 |
93 | Args:
94 | saver: TensorFlow Saver object.
95 | sess: TensorFlow session.
96 | logdir: path to the snapshots directory.
97 | step: current training step.
98 | '''
99 | model_name = 'model.ckpt'
100 | checkpoint_path = os.path.join(logdir, model_name)
101 |
102 | if not os.path.exists(logdir):
103 | os.makedirs(logdir)
104 | saver.save(sess, checkpoint_path, global_step=step)
105 | print('The checkpoint has been created.')
106 |
107 | def load(saver, sess, ckpt_path):
108 | '''Load trained weights.
109 |
110 | Args:
111 | saver: TensorFlow Saver object.
112 | sess: TensorFlow session.
113 | ckpt_path: path to checkpoint file with parameters.
114 | '''
115 | saver.restore(sess, ckpt_path)
116 | print("Restored model parameters from {}".format(ckpt_path))
117 |
118 | def file_len(fname):
119 | with open(fname) as f:
120 | for i, l in enumerate(f):
121 | pass
122 | return i + 1
123 |
124 | def main():
125 | """Create the model and start the training."""
126 | args = get_arguments()
127 |
128 | h, w = map(int, args.input_size.split(','))
129 | input_size = (h, w)
130 | num_steps = int(file_len(args.data_list) * args.num_epochs / args.batch_size)
131 | print('Total number of steps is '+str(num_steps))
132 | tf.set_random_seed(args.random_seed)
133 |
134 | # Create queue coordinator.
135 | coord = tf.train.Coordinator()
136 |
137 | # Load reader.
138 | with tf.name_scope("create_inputs"):
139 | reader = ImageReader(
140 | args.data_dir,
141 | args.data_list,
142 | input_size,
143 | args.random_scale,
144 | args.random_mirror,
145 | args.ignore_label,
146 | IMG_MEAN,
147 | coord)
148 | image_batch, label_batch = reader.dequeue(args.batch_size)
149 | print(args.random_scale,
150 | args.random_mirror)
151 | # Create network.
152 | net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes)
153 | # For a small batch size, it is better to keep
154 | # the statistics of the BN layers (running means and variances)
155 | # frozen, and to not update the values provided by the pre-trained model.
156 | # If is_training=True, the statistics will be updated during the training.
157 | # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
158 | # if they are presented in var_list of the optimiser definition.
159 |
160 | # Predictions.
161 | raw_output = net.layers['fc1_voc12']
162 | # Which variables to load. Running means and variances are not trainable,
163 | # thus all_variables() should be restored.
164 | restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last]
165 | all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]
166 | fc_trainable = [v for v in all_trainable if 'fc' in v.name]
167 | conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0
168 | fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0
169 | fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0
170 | assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable))
171 | assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))
172 |
173 |
174 | # Predictions: ignoring all predictions with labels greater or equal than n_classes
175 | raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
176 | label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w]
177 | raw_gt = tf.reshape(label_proc, [-1,])
178 | indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
179 | gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
180 | prediction = tf.gather(raw_prediction, indices)
181 |
182 |
183 | # Pixel-wise softmax loss.
184 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
185 | l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
186 | reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)
187 |
188 | # Processed predictions: for visualisation.
189 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
190 | raw_output_up = tf.argmax(raw_output_up, dimension=3)
191 | pred = tf.expand_dims(raw_output_up, dim=3)
192 |
193 | # Image summary.
194 | images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8)
195 | labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8)
196 | preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8)
197 |
198 | if not os.path.exists(args.snapshot_dir):
199 | os.mkdir(args.snapshot_dir)
200 |
201 | total_summary = tf.summary.image('images',
202 | tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]),
203 | max_outputs=args.save_num_images) # Concatenate row-wise.
204 | summary_writer = tf.summary.FileWriter(args.snapshot_dir,
205 | graph=tf.get_default_graph())
206 |
207 | # Define loss and optimisation parameters.
208 | base_lr = tf.constant(args.learning_rate)
209 | step_ph = tf.placeholder(dtype=tf.float32, shape=())
210 | learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / num_steps), args.power))
211 |
212 | opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
213 | opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum)
214 | opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum)
215 |
216 | grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
217 | grads_conv = grads[:len(conv_trainable)]
218 | grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))]
219 | grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]
220 |
221 | train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
222 | train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
223 | train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))
224 |
225 | train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)
226 |
227 |
228 | # Set up tf session and initialize variables.
229 | config = tf.ConfigProto()
230 | config.gpu_options.allow_growth = True
231 | sess = tf.Session(config=config)
232 | init = tf.global_variables_initializer()
233 |
234 | sess.run(init)
235 |
236 | # Saver for storing checkpoints of the model.
237 | saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)
238 |
239 | # Load variables if the checkpoint is provided.
240 | if args.restore_from is not None:
241 | loader = tf.train.Saver(var_list=restore_var)
242 | load(loader, sess, args.restore_from)
243 |
244 | # Start queue threads.
245 | threads = tf.train.start_queue_runners(coord=coord, sess=sess)
246 |
247 | # Iterate over training steps.
248 | for step in range(num_steps):
249 | start_time = time.time()
250 | feed_dict = { step_ph : step }
251 |
252 | if step % args.save_pred_every == 0 or step == num_steps-1:
253 | loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, train_op], feed_dict=feed_dict)
254 | summary_writer.add_summary(summary, step)
255 | save(saver, sess, args.snapshot_dir, step)
256 | else:
257 | loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict)
258 | duration = time.time() - start_time
259 | print('step {:d} \t loss = {:.3f} , ({:.3f} sec/step)'.format(step, loss_value, duration))
260 | coord.request_stop()
261 | coord.join(threads)
262 |
263 | if __name__ == '__main__':
264 | main()
265 |
--------------------------------------------------------------------------------
/data_generation/generate_prior_util.py:
--------------------------------------------------------------------------------
1 | '''
2 | Variables and functions for prior generation.
3 | The order of keypoints is:
4 | 0-'right ankle' 1-'right knee' 2-'right hip' 3-'left hip' 4-'left knee' 5-'left ankle' 6-'pelvis' 7-'thorax' 8-'neck'
5 | 9-'head' 10-'right wrist' 11-'right elbow' 12-'right shoulder' 13-'left shoulder' 14-'left elbow' 15-'left wrist'.
6 | When pelvis is missing, we use the midpoint of two hips instead.
7 | Thorax is unused and set to be the same(0, 0).
8 | '''
9 |
10 | import numpy as np
11 | import os
12 | import copy
13 | import cv2
14 | import random
15 |
16 |
17 | '''
18 | Used for aligning torso.
19 | '''
20 | RIGHT_LEG = [6, 2, 1, 0]
21 | LEFT_LEG = [6, 3, 4, 5]
22 | RIGHT_ARM = [6, 8, 12, 11, 10]
23 | LEFT_ARM = [6, 8, 13, 14, 15]
24 | HEAD = [6, 8, 9]
25 | SPINE = [6, 8]
26 | RIGHT_ARM_SPINE = [8, 12, 11, 10]
27 | LEFT_ARM_SPINE = [8, 13, 14, 15]
28 | HEAD_SPINE = [8, 9]
29 |
30 | '''
31 | Colors for each part of body.
32 | '''
33 | body_part_color = np.array([
34 | [0, 0, 0],
35 | [128, 0, 0],
36 | [0, 128, 0],
37 | [128, 128, 0],
38 | [0, 0, 128],
39 | [128, 0, 128],
40 | [0, 128, 128],
41 | [128, 128, 128],
42 | [64, 0, 0],
43 | [192, 0, 0],
44 | [64, 128, 0]],
45 | dtype=np.uint8)
46 |
47 | '''
48 | Merge 1 - 10 labels to 1 - 6 labels,
49 | e.g. left && right upper arm ==> upper arm.
50 | '''
51 | merge_mask = np.array([0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6], dtype=np.uint8)
52 |
53 | '''
54 | Morphing is done for each body part associated with following skeleton lines and then merged together.
55 | '''
56 | main_skeleton_lines = [
57 | [],
58 | [8, 9],
59 | [6, 8],
60 | [13, 14],
61 | [12, 11],
62 | [14, 15],
63 | [11, 10],
64 | [3, 4],
65 | [2, 1],
66 | [4, 5],
67 | [1, 0]]
68 |
69 | '''
70 | Lines of skeleton.
71 | '''
72 | skeletonLines = [
73 | [0, 1],
74 | [1, 2],
75 | [2, 6],
76 | [6, 3],
77 | [3, 4],
78 | [4, 5],
79 | [6, 8],
80 | [8, 9],
81 | [8, 12],
82 | [12, 11],
83 | [11, 10],
84 | [8, 13],
85 | [13, 14],
86 | [14, 15]
87 | ]
88 |
89 | '''
90 | Dictionary of (color : RGB) pair.
91 | '''
92 | colorDict = {
93 | "purple": [255, 0, 128],
94 | "orange": [1, 96, 254],
95 | "light_blue": [255, 141, 28],
96 | "dark_blue": [232, 0, 0],
97 | "red": [0, 0, 255]
98 | }
99 |
100 | '''
101 | Colors for each skeleton line.
102 | '''
103 | skeletonColor = ["orange", "orange", "orange", "light_blue", "light_blue", "light_blue",
104 | "purple", "purple", "red", "red", "red", "dark_blue", "dark_blue", "dark_blue"
105 | ]
106 |
107 |
108 | def drawSkeleton(img, pose):
109 | '''
110 | Given an image and a pose, draw the skeleton on that image.
111 |
112 | :param img:
113 | Image to draw skeleton on.
114 | :param pose:
115 | Pose of shape (1 x 32).
116 | :return:
117 | Image with skeleton lines.
118 | '''
119 | retImg = copy.deepcopy(img)
120 | pose = pose[0]
121 | for i in range(len(skeletonLines)):
122 | a = skeletonLines[i][0]
123 | b = skeletonLines[i][1]
124 | cv2.line(retImg, (int(pose[a*2]), int(pose[a*2+1])), (int(pose[b*2]), int(pose[b*2+1])), list(map(lambda i: i*0.6, colorDict[skeletonColor[i]])), 3)
125 | return retImg
126 |
127 |
128 | def align_torso(poses):
129 | '''
130 | Align torso length to the same(50).
131 |
132 | :param poses:
133 | 2-dimension array. The shape should be N x 32.
134 | :return:
135 | Aligned pose array.
136 | '''
137 | poses_new = copy.deepcopy(poses)
138 | for i in range(1, 4):
139 | poses_new[:, (2 * RIGHT_LEG[i]):(2 * RIGHT_LEG[i] + 2)] = poses[:, (2 * RIGHT_LEG[i]):(2 * RIGHT_LEG[i] + 2)] - poses[:, (2 * RIGHT_LEG[i - 1]):(2 * RIGHT_LEG[i - 1] + 2)]
140 | poses_new[:, 2 * LEFT_LEG[i]:2 * LEFT_LEG[i] + 2] = poses[:, 2 * LEFT_LEG[i]:2 * LEFT_LEG[i] + 2] - poses[:, 2 * LEFT_LEG[i - 1]:2 * LEFT_LEG[i - 1] + 2]
141 | for i in range(1, 5):
142 | poses_new[:, 2 * RIGHT_ARM[i]:2 * RIGHT_ARM[i] + 2] = poses[:, 2 * RIGHT_ARM[i]:2 * RIGHT_ARM[i] + 2] - poses[:, 2 * RIGHT_ARM[i - 1]:2 * RIGHT_ARM[i - 1] + 2]
143 | poses_new[:, 2 * LEFT_ARM[i]:2 * LEFT_ARM[i] + 2] = poses[:, 2 * LEFT_ARM[i]:2 * LEFT_ARM[i] + 2] - poses[:, 2 * LEFT_ARM[i - 1]:2 * LEFT_ARM[i - 1] + 2]
144 | for i in range(1, 3):
145 | poses_new[:, 2 * HEAD[i]:2 * HEAD[i] + 2] = poses[:, 2 * HEAD[i]:2 * HEAD[i] + 2] - poses[:, 2 * HEAD[i - 1]:2 * HEAD[i - 1] + 2]
146 |
147 | ratio = 50 / np.sqrt(np.square(poses_new[:, 16:17]) + np.square(poses_new[:, 17:18]))
148 | poses_ret = poses_new * np.tile(ratio[:, 0:1], [1, 32])
149 |
150 | for i in range(1, 4):
151 | poses_ret[:, (2 * RIGHT_LEG[i]):(2 * RIGHT_LEG[i] + 2)] = poses_ret[:, (2 * RIGHT_LEG[i]):(2 * RIGHT_LEG[i] + 2)] + poses_ret[:, (2 * RIGHT_LEG[i - 1]):(2 * RIGHT_LEG[i - 1] + 2)]
152 | poses_ret[:, 2 * LEFT_LEG[i]:2 * LEFT_LEG[i] + 2] = poses_ret[:, 2 * LEFT_LEG[i]:2 * LEFT_LEG[i] + 2] + poses_ret[:, 2 * LEFT_LEG[i - 1]:2 * LEFT_LEG[i - 1] + 2]
153 | for i in range(1, 2):
154 | poses_ret[:, (2 * SPINE[i]):(2 * SPINE[i] + 2)] = poses_ret[:, (2 * SPINE[i]):(2 * SPINE[i] + 2)] + poses_ret[:, (2 * SPINE[i - 1]):(2 * SPINE[i - 1] + 2)]
155 | for i in range(1, 4):
156 | poses_ret[:, 2 * RIGHT_ARM_SPINE[i]:2 * RIGHT_ARM_SPINE[i] + 2] = poses_ret[:, 2 * RIGHT_ARM_SPINE[i]:2 * RIGHT_ARM_SPINE[i] + 2] + poses_ret[:, 2 * RIGHT_ARM_SPINE[i - 1]:2 * RIGHT_ARM_SPINE[i - 1] + 2]
157 | poses_ret[:, 2 * LEFT_ARM_SPINE[i]:2 * LEFT_ARM_SPINE[i] + 2] = poses_ret[:, 2 * LEFT_ARM_SPINE[i]:2 * LEFT_ARM_SPINE[i] + 2] + poses_ret[:, 2 * LEFT_ARM_SPINE[i - 1]:2 * LEFT_ARM_SPINE[i - 1] + 2]
158 | for i in range(1, 2):
159 | poses_ret[:, 2 * HEAD_SPINE[i]:2 * HEAD_SPINE[i] + 2] = poses_ret[:, 2 * HEAD_SPINE[i]:2 * HEAD_SPINE[i] + 2] + poses_ret[:, 2 * HEAD_SPINE[i - 1]:2 * HEAD_SPINE[i - 1] + 2]
160 |
161 | center = np.array((250, 250), dtype=float)
162 | centers = np.tile(center, (poses.shape[0], 1))
163 | centers = centers - poses_ret[:, 12:14]
164 | centers = np.tile(centers, (1, 16))
165 | poses_ret = poses_ret + centers
166 |
167 | return poses_ret
168 |
169 |
170 | def load_pascal_pose(pascal_pose_file_root):
171 | '''
172 | Load preprocessed Pascal pose file.
173 |
174 | :param pascal_pose_file_root:
175 | Root of pascal pose file.
176 | :return:
177 | pose_arr: 2-dimension array of aligned pose of shape N x 32, thorax set to be (0, 0).
178 | img_names: list of image names.
179 | pose_dict: dictionary of (image name : unaligned pose[1 x 32]) pairs.
180 | '''
181 | pascal_pose_file = open(pascal_pose_file_root, "r")
182 |
183 | pose_list = []
184 | img_names = []
185 | pose_dict = {}
186 |
187 | line_count = 0
188 | while True:
189 | line = pascal_pose_file.readline()
190 | if not line:
191 | break
192 | words = line.split(",")
193 | img_name = words[0]
194 | pose_tmp = np.zeros((1, 32), dtype=float)
195 | for i in range(16):
196 | x = words[1 + 3 * i]
197 | y = words[1 + 3 * i + 1]
198 | # words[1 + 3 * i + 2] is_visible (not used)
199 |
200 | pose_tmp[0][2 * i] = float(x)
201 | pose_tmp[0][2 * i + 1] = float(y)
202 |
203 | has_negative = False
204 | for i in range(16):
205 | if pose_tmp[0][2 * i] < 0 or pose_tmp[0][2 * i + 1] < 0:
206 | has_negative = True
207 | break
208 | if has_negative:
209 | continue
210 | pose_tmp[0][12] = (pose_tmp[0][4] + pose_tmp[0][6]) / 2
211 | pose_tmp[0][13] = (pose_tmp[0][5] + pose_tmp[0][7]) / 2
212 | pose_list.append(pose_tmp)
213 | img_names.append(img_name[0:len(img_name) - 4])
214 | pose_dict[img_name[0:len(img_name) - 4]] = pose_tmp
215 |
216 | line_count += 1
217 |
218 | pascal_pose_file.close()
219 |
220 | pose_arr = np.zeros((len(pose_list), 32), dtype=float)
221 | for i in range(len(pose_list)):
222 | pose_arr[i] = pose_list[i]
223 |
224 | pose_arr = align_torso(pose_arr)
225 | pose_arr[:, 14:16] = np.tile(np.zeros((1, 2), dtype=float), (len(pose_list), 1))
226 | return pose_arr, img_names, pose_dict
227 |
228 |
229 | def paint(mask_img, merge):
230 | '''
231 | Paint parsing result(mask) to color image.
232 |
233 | :param mask_img:
234 | 1-channel parsing result.
235 | :param merge:
236 | 0 or 1. If 1, merge 10 parts into 6 parts.
237 | :return:
238 | Corresponding color image.
239 | '''
240 | assert (len(mask_img.shape) == 2)
241 | if merge:
242 | return body_part_color[merge_mask[mask_img]]
243 | else:
244 | return body_part_color[mask_img]
245 |
246 |
247 | def morphing(origin_mask_img, origin_pose, target_pose, target_size): # target_size [width, height]
248 | '''
249 | According to origin pose and target pose, morph the origin mask image so as to get the same pose as the target pose.
250 |
251 | :param origin_mask_img:
252 | Origin mask image, 1-channel, of labels 0-10 (0 for backgraound).
253 | :param origin_pose:
254 | 1-dimension pose array, of shape (32, ).
255 | :param target_pose:
256 | 1-dimension pose array, of shape (32, ).
257 | :param target_size:
258 | Target image size: [width, height].
259 | :return:
260 | Color image of morphed mask image, of size target_size.
261 | '''
262 | assert (len(origin_mask_img.shape) == 2)
263 | assert (len(origin_pose.shape) == 1)
264 | assert (len(target_pose.shape) == 1)
265 |
266 | target_mask_img = np.zeros((target_size[1], target_size[0]), dtype=np.uint8)
267 | # morphing for each part
268 | for label in range(1, 11):
269 | origin_size = np.array([origin_mask_img.shape[1], origin_mask_img.shape[0]], dtype=int)
270 | origin_body_part = origin_mask_img * (origin_mask_img == label)
271 | a = main_skeleton_lines[label][0]
272 | b = main_skeleton_lines[label][1]
273 | origin_pose_part_a = np.array([origin_pose[a * 2], origin_pose[a * 2 + 1]], dtype=float)
274 | origin_pose_part_b = np.array([origin_pose[b * 2], origin_pose[b * 2 + 1]], dtype=float)
275 | origin_pose_part_tensor = origin_pose_part_b - origin_pose_part_a
276 | target_pose_part_a = np.array([target_pose[a * 2], target_pose[a * 2 + 1]], dtype=float)
277 | target_pose_part_b = np.array([target_pose[b * 2], target_pose[b * 2 + 1]], dtype=float)
278 | target_pose_part_tensor = target_pose_part_b - target_pose_part_a
279 | origin_pose_part_length = np.sqrt(np.sum(np.square(origin_pose_part_tensor)))
280 | target_pose_part_length = np.sqrt(np.sum(np.square(target_pose_part_tensor)))
281 | # scaling ratio
282 | scale_factor = target_pose_part_length / origin_pose_part_length
283 | if scale_factor == 0:
284 | continue
285 | # rotating angle
286 | theta = - (np.arctan2(target_pose_part_tensor[1], target_pose_part_tensor[0]) - np.arctan2(
287 | origin_pose_part_tensor[1], origin_pose_part_tensor[0])) * 180 / np.pi
288 |
289 | ''' scale '''
290 | origin_size[0] *= scale_factor
291 | origin_size[1] *= scale_factor
292 | origin_pose_part_a *= scale_factor
293 | origin_pose_part_b *= scale_factor
294 | origin_body_part = cv2.resize(origin_body_part, (origin_size[0], origin_size[1]),
295 | interpolation=cv2.INTER_NEAREST)
296 | # print("finish scale", label)
297 |
298 | ''' translate to the center in case rotation out of the image '''
299 | origin_pose_part_center = (origin_pose_part_a + origin_pose_part_b) / 2
300 | origin_center = origin_size / 2
301 | tx = origin_center[0] - int(origin_pose_part_center[0])
302 | ty = origin_center[1] - int(origin_pose_part_center[1])
303 | tm = np.float32([[1, 0, tx], [0, 1, ty]])
304 | origin_body_part = cv2.warpAffine(origin_body_part, tm, (origin_size[0], origin_size[1]))
305 | # print("finish translate", label)
306 |
307 | ''' rotate '''
308 | rm = cv2.getRotationMatrix2D((origin_center[0], origin_center[1]), theta, 1)
309 | origin_body_part = cv2.warpAffine(origin_body_part, rm, (origin_size[0], origin_size[1]))
310 | origin_body_part = (origin_body_part != 0) * label
311 | # print("finish rotate", label)
312 |
313 | ''' crop and paste '''
314 | target_pose_part_center = (target_pose_part_a + target_pose_part_b) / 2
315 | target_pose_part_center[0] = int(target_pose_part_center[0])
316 | target_pose_part_center[1] = int(target_pose_part_center[1])
317 | if target_pose_part_center[1] >= origin_center[1]:
318 | origin_row_low = 0
319 | target_row_low = target_pose_part_center[1] - origin_center[1]
320 | else:
321 | origin_row_low = origin_center[1] - target_pose_part_center[1]
322 | target_row_low = 0
323 | if (target_size[1] - target_pose_part_center[1]) >= (origin_size[1] - origin_center[1]):
324 | origin_row_high = origin_size[1]
325 | target_row_high = target_pose_part_center[1] + origin_size[1] - origin_center[1]
326 | else:
327 | origin_row_high = origin_center[1] + target_size[1] - target_pose_part_center[1]
328 | target_row_high = target_size[1]
329 | if target_pose_part_center[0] >= origin_center[0]:
330 | origin_col_low = 0
331 | target_col_low = target_pose_part_center[0] - origin_center[0]
332 | else:
333 | origin_col_low = origin_center[0] - target_pose_part_center[0]
334 | target_col_low = 0
335 | if (target_size[0] - target_pose_part_center[0]) >= (origin_size[0] - origin_center[0]):
336 | origin_col_high = origin_size[0]
337 | target_col_high = target_pose_part_center[0] + origin_size[0] - origin_center[0]
338 | else:
339 | origin_col_high = origin_center[0] + target_size[0] - target_pose_part_center[0]
340 | target_col_high = target_size[0]
341 | origin_row_low = int(origin_row_low)
342 | target_row_low = int(target_row_low)
343 | origin_row_high = int(origin_row_high)
344 | target_row_high = int(target_row_high)
345 | origin_col_low = int(origin_col_low)
346 | target_col_low = int(target_col_low)
347 | origin_col_high = int(origin_col_high)
348 | target_col_high = int(target_col_high)
349 | target_mask_img[target_row_low:target_row_high, target_col_low:target_col_high] = np.maximum(
350 | target_mask_img[target_row_low:target_row_high, target_col_low:target_col_high],
351 | origin_body_part[origin_row_low:origin_row_high, origin_col_low:origin_col_high])
352 | # print("finish crop and paste", label)
353 |
354 | return paint(target_mask_img, merge=True)
355 |
356 |
357 | def random_k_within_n(n, k):
358 | '''
359 | Get random k number in [0, n).
360 |
361 | :param n:
362 | :param k:
363 | :return:
364 | '''
365 | arr = np.arange(n)
366 | random.shuffle(arr)
367 | return arr[0:k]
368 |
369 |
370 | def find_nearest_pose(aligned_pose, pascal_poses):
371 | '''
372 | Given a aligned pose, order the aligned pascal pose array and return the index array.
373 |
374 | :param aligned_pose:
375 | Aligned pose(throax set to (0, 0)), 1-d(32, ) or 2-d(1, 32).
376 | :param pascal_poses:
377 | Aligned pascal poses, of shape(N x 32).
378 | :return:
379 | Ordered index array.
380 | '''
381 | pose_tmp = np.tile(aligned_pose, (pascal_poses.shape[0], 1))
382 | pose_pascal_distance = np.sum(np.square(pose_tmp - pascal_poses), axis=1)
383 | distance_index = np.argsort(pose_pascal_distance)
384 | return distance_index
385 |
386 |
387 | def generate_prior_single_person(bbox, raw_pose, PASCALMaskImgDir, pascal_poses, pascal_img_names, pascal_pose_dict, n, k, exclude_self=False, save_dir=None):
388 | '''
389 | Generate prior for a single person.
390 |
391 | :param bbox:
392 | Bounding box of the person(can be image size).
393 | The pose will be aligned according to the upper left coordinate of bbox.
394 | The generated prior image size is the same as bbox.
395 | The lower right point is inclusive.
396 | :param raw_pose:
397 | Unaligned pose of shape (1, 32).
398 | :param save_dir:
399 | Path to save n nearest priors. If is None, do not save.
400 | :param PASCALMaskImgDir:
401 | Path to pascal mask images.
402 | :param pascal_poses:
403 | 2-d array of aligned pascal poses.
404 | :param pascal_img_names:
405 | List of pascal image names.
406 | :param pascal_pose_dict:
407 | Dictionary of (image name : unaligned pose) pairs.
408 | :param n:
409 | Pick n nearest poses.
410 | :param k:
411 | Average nearest k priors.
412 | :param exclude_self:
413 | If 1, when we are finding nearest pose for pascal image in pascal images, the nearset one must be itself, so that the first one should be excluded.
414 | If 0, the first one not excluded.
415 | :return:
416 | The averaged prior. At the same time, n priors are saved in save_dir.
417 | '''
418 | # raw_pose: [[x1,y1,x2,y2,...,x16,y16]]
419 | aligned_pose = align_torso(raw_pose)
420 | aligned_pose[:, 14:16] = np.tile(np.zeros((1, 2), dtype=float), (1, 1))
421 | distance_index = find_nearest_pose(aligned_pose, pascal_poses)
422 | if exclude_self:
423 | distance_index = distance_index[1:]
424 |
425 | # bbox: [x1, y1, x2, y2]
426 | width = bbox[2] - bbox[0] + 1
427 | heigth = bbox[3] - bbox[1] + 1
428 | pose = copy.deepcopy(raw_pose)
429 | pose -= np.tile(np.array([bbox[0], bbox[1]]), (1, 16))
430 | pose = pose[0]
431 | origin_size = np.array([int(width), int(heigth)], dtype=int)
432 | average_parsing = np.zeros((heigth, width, 3), dtype=float)
433 |
434 | close_pascal_index = distance_index[random_k_within_n(n, k)]
435 | for j in range(n):
436 | if distance_index[j] not in close_pascal_index and save_dir is None:
437 | continue
438 | pascal_name = pascal_img_names[distance_index[j]]
439 | # get PASCAL mask img and morph
440 | print(j, 'picked pascal', pascal_name)
441 | pascal_mask_img = cv2.imread(os.path.join(PASCALMaskImgDir, pascal_name + ".png"), 0)
442 | # cv2.imwrite(os.path.join(save_dir, 'origin_' + str(j) + '_' + pascal_name + '.png'), paint(pascal_mask_img))
443 | pascal_pose = pascal_pose_dict[pascal_name][0]
444 | morphingImg = morphing(pascal_mask_img, pascal_pose, pose, origin_size)
445 | if save_dir is not None:
446 | cv2.imwrite(os.path.join(save_dir, str(j) + '.png'), morphingImg[:, :, [2, 1, 0]])
447 | if distance_index[j] in close_pascal_index:
448 | average_parsing += morphingImg
449 |
450 | average_parsing /= k
451 | return average_parsing
452 |
--------------------------------------------------------------------------------