├── .gitignore ├── LICENSE ├── LIP_model.py ├── README.md ├── datasets ├── examples │ ├── images │ │ ├── 114317_456748.jpg │ │ ├── 208597_461278.jpg │ │ ├── 313434_204398.jpg │ │ ├── 342469_423620.jpg │ │ ├── 447689_524975.jpg │ │ └── 76680_475011.jpg │ └── list │ │ └── val.txt └── lip │ ├── create_heatmaps.py │ ├── lip_train_set.csv │ └── list │ ├── train_id.txt │ ├── train_rev.txt │ └── val_id.txt ├── evaluate_parsing_JPPNet-s2.py ├── evaluate_pose_JPPNet-s2.py ├── get_maximum_square_from_segmented_image.py ├── kaffe ├── __init__.py ├── caffe │ ├── __init__.py │ ├── caffe_pb2.py │ └── resolver.py ├── errors.py ├── graph.py ├── layers.py ├── shapes.py ├── tensorflow │ ├── __init__.py │ ├── network.py │ └── transformer.py └── transformers.py ├── train_JPPNet-s2.py └── utils ├── __init__.py ├── image_reader.py ├── lip_reader.py ├── model.py ├── ops.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | checkpoint/ 2 | output/ 3 | logs/ 4 | model/ 5 | *.pyc 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Vladimir Nekrasov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LIP_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from utils.ops import * 3 | 4 | 5 | #------------------------network setting--------------------- 6 | ################################################# 7 | 8 | ## refine net version 4. 07.17 9 | 10 | def pose_net(image, name): 11 | with tf.variable_scope(name) as scope: 12 | is_BN = False 13 | pose_conv1 = conv2d(image, 512, 3, 1, relu=True, bn=is_BN, name='pose_conv1') 14 | pose_conv2 = conv2d(pose_conv1, 512, 3, 1, relu=True, bn=is_BN, name='pose_conv2') 15 | pose_conv3 = conv2d(pose_conv2, 256, 3, 1, relu=True, bn=is_BN, name='pose_conv3') 16 | pose_conv4 = conv2d(pose_conv3, 256, 3, 1, relu=True, bn=is_BN, name='pose_conv4') 17 | pose_conv5 = conv2d(pose_conv4, 256, 3, 1, relu=True, bn=is_BN, name='pose_conv5') 18 | pose_conv6 = conv2d(pose_conv5, 256, 3, 1, relu=True, bn=is_BN, name='pose_conv6') 19 | 20 | pose_conv7 = conv2d(pose_conv6, 512, 1, 1, relu=True, bn=is_BN, name='pose_conv7') 21 | pose_conv8 = conv2d(pose_conv7, 16, 1, 1, relu=False, bn=is_BN, name='pose_conv8') 22 | 23 | return pose_conv8, pose_conv6 24 | 25 | 26 | def pose_refine(pose, parsing, pose_fea, name): 27 | with tf.variable_scope(name) as scope: 28 | is_BN = False 29 | # 1*1 convolution remaps the heatmaps to match the number of channels of the intermediate features. 30 | pose = conv2d(pose, 128, 1, 1, relu=True, bn=is_BN, name='pose_remap') 31 | parsing = conv2d(parsing, 128, 1, 1, relu=True, bn=is_BN, name='parsing_remap') 32 | # concat 33 | pos_par = tf.concat([pose, parsing, pose_fea], 3) 34 | conv1 = conv2d(pos_par, 512, 3, 1, relu=True, bn=is_BN, name='conv1') 35 | conv2 = conv2d(conv1, 256, 5, 1, relu=True, bn=is_BN, name='conv2') 36 | conv3 = conv2d(conv2, 256, 7, 1, relu=True, bn=is_BN, name='conv3') 37 | conv4 = conv2d(conv3, 256, 9, 1, relu=True, bn=is_BN, name='conv4') 38 | 39 | conv5 = conv2d(conv4, 256, 1, 1, relu=True, bn=is_BN, name='conv5') 40 | conv6 = conv2d(conv5, 16, 1, 1, relu=False, bn=is_BN, name='conv6') 41 | 42 | return conv6, conv4 43 | 44 | 45 | def parsing_refine(parsing, pose, parsing_fea, name): 46 | with tf.variable_scope(name) as scope: 47 | is_BN = False 48 | pose = conv2d(pose, 128, 1, 1, relu=True, bn=is_BN, name='pose_remap') 49 | parsing = conv2d(parsing, 128, 1, 1, relu=True, bn=is_BN, name='parsing_remap') 50 | 51 | par_pos = tf.concat([parsing, pose, parsing_fea], 3) 52 | parsing_conv1 = conv2d(par_pos, 512, 3, 1, relu=True, bn=is_BN, name='parsing_conv1') 53 | parsing_conv2 = conv2d(parsing_conv1, 256, 5, 1, relu=True, bn=is_BN, name='parsing_conv2') 54 | parsing_conv3 = conv2d(parsing_conv2, 256, 7, 1, relu=True, bn=is_BN, name='parsing_conv3') 55 | parsing_conv4 = conv2d(parsing_conv3, 256, 9, 1, relu=True, bn=is_BN, name='parsing_conv4') 56 | 57 | parsing_conv5 = conv2d(parsing_conv4, 256, 1, 1, relu=True, bn=is_BN, name='parsing_conv5') 58 | parsing_human1 = atrous_conv2d(parsing_conv5, 20, 3, rate=6, relu=False, name='parsing_human1') 59 | parsing_human2 = atrous_conv2d(parsing_conv5, 20, 3, rate=12, relu=False, name='parsing_human2') 60 | parsing_human3 = atrous_conv2d(parsing_conv5, 20, 3, rate=18, relu=False, name='parsing_human3') 61 | parsing_human4 = atrous_conv2d(parsing_conv5, 20, 3, rate=24, relu=False, name='parsing_human4') 62 | parsing_human = tf.add_n([parsing_human1, parsing_human2, parsing_human3, parsing_human4], name='parsing_human') 63 | 64 | return parsing_human, parsing_conv4 65 | ################################################# 66 | 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Joint Body Parsing & Pose Estimation Network (JPPNet) 2 | Xiaodan Liang, Ke Gong, Xiaohui Shen, and Liang Lin, "Look into Person: Joint Body Parsing & Pose Estimation Network and A New Benchmark", T-PAMI 2018. 3 | 4 | ### Introduction 5 | 6 | JPPNet is a state-of-art deep learning methord for human parsing and pose estimation built on top of [Tensorflow](http://www.tensorflow.org). 7 | 8 | This novel joint human parsing and pose estimation network incorporates the multiscale feature connections and iterative location refinement in an end-to-end framework to investigate efficient context modeling and then enable parsing and pose tasks that are mutually beneficial to each other. This unified framework achieves state-of-the-art performance for both human parsing and pose estimation tasks. 9 | 10 | 11 | This distribution provides a publicly available implementation for the key model ingredients reported in our latest [paper](https://arxiv.org/pdf/1804.01984.pdf) which is accepted by T-PAMI 2018. 12 | 13 | We simplify the network to solve human parsing by exploring a novel self-supervised structure-sensitive learning approach, which imposes human pose structures into the parsing results without resorting to extra supervision. There is also a public implementation of this self-supervised structure-sensitive JPPNet ([SS-JPPNet](https://github.com/Engineering-Course/LIP_SSL)). 14 | 15 | 16 | ### Look into People (LIP) Dataset 17 | 18 | The SSL is trained and evaluated on our [LIP dataset](https://lip.sysuhcp.com/) for human parsing. Please check it for more model details. The dataset is also available at [google drive](https://drive.google.com/drive/folders/0BzvH3bSnp3E9QjVYZlhWSjltSWM?resourcekey=0-nkS8bDVjPs3bEw3UZW-omA&usp=sharing) and [baidu drive](http://pan.baidu.com/s/1nvqmZBN). 19 | 20 | 21 | ### Pre-trained models 22 | 23 | We have released our trained models of JPPNet on LIP dataset at [google drive](https://drive.google.com/open?id=1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg) and [baidu drive](https://pan.baidu.com/s/1hQvg1TMIt0JA0yMfjyzQgQ). 24 | 25 | 26 | 27 | 28 | ### Inference 29 | 1. Download the pre-trained model and store in $HOME/checkpoint. 30 | 2. Prepare the images and store in $HOME/datasets. 31 | 3. Run evaluate_pose_JPPNet-s2.py for pose estimation and evaluate_parsing_JPPNet-s2.py for human parsing. 32 | 4. The results are saved in $HOME/output 33 | 34 | ### Training 35 | 1. Download the pre-trained model and store in $HOME/checkpoint. 36 | 2. Download LIP dataset or prepare your own data and store in $HOME/datasets. 37 | 3. For LIP dataset, we have provided images, parsing labels, lists and the left-right flipping labels (labels_rev) for data augmentation. You need to generate the heatmaps of pose labels. We have provided a script for reference. 38 | 4. Run train_JPPNet-s2.py to train the JPPNet with two refinement stages. 39 | 5. Use evaluate_pose_JPPNet-s2.py and evaluate_parsing_JPPNet-s2.py to generate the results or evaluate the trained models. 40 | 6. Note that the LIPReader class is only suit for labels in LIP for the left-right flipping augmentation. If you want to train on other datasets with different labels, you may have to re-write an image reader class. 41 | 42 | ## Citation 43 | If you use this code for your research, please cite our papers. 44 | ``` 45 | @article{liang2018look, 46 | title={Look into Person: Joint Body Parsing \& Pose Estimation Network and a New Benchmark}, 47 | author={Liang, Xiaodan and Gong, Ke and Shen, Xiaohui and Lin, Liang}, 48 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 49 | year={2018}, 50 | publisher={IEEE} 51 | } 52 | 53 | @InProceedings{Gong_2017_CVPR, 54 | author = {Gong, Ke and Liang, Xiaodan and Zhang, Dongyu and Shen, Xiaohui and Lin, Liang}, 55 | title = {Look Into Person: Self-Supervised Structure-Sensitive Learning and a New Benchmark for Human Parsing}, 56 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 57 | month = {July}, 58 | year = {2017} 59 | } 60 | ``` 61 | 62 | 63 | ## Related work 64 | + Self-supervised Structure-sensitive Learning [SSL](https://github.com/Engineering-Course/LIP_SSL), CVPR2017 65 | + Instance-level Human Parsing via Part Grouping Network [PGN](https://github.com/Engineering-Course/CIHP_PGN), ECCV2018 66 | + Graphonomy: Universal Human Parsing via Graph Transfer Learning [Graphonomy](https://github.com/Gaoyiminggithub/Graphonomy), CVPR2019 67 | -------------------------------------------------------------------------------- /datasets/examples/images/114317_456748.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/114317_456748.jpg -------------------------------------------------------------------------------- /datasets/examples/images/208597_461278.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/208597_461278.jpg -------------------------------------------------------------------------------- /datasets/examples/images/313434_204398.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/313434_204398.jpg -------------------------------------------------------------------------------- /datasets/examples/images/342469_423620.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/342469_423620.jpg -------------------------------------------------------------------------------- /datasets/examples/images/447689_524975.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/447689_524975.jpg -------------------------------------------------------------------------------- /datasets/examples/images/76680_475011.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/76680_475011.jpg -------------------------------------------------------------------------------- /datasets/examples/list/val.txt: -------------------------------------------------------------------------------- 1 | /images/114317_456748.jpg 2 | /images/342469_423620.jpg 3 | /images/76680_475011.jpg 4 | /images/447689_524975.jpg 5 | /images/208597_461278.jpg 6 | /images/313434_204398.jpg 7 | -------------------------------------------------------------------------------- /datasets/lip/create_heatmaps.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import math 3 | import random 4 | import scipy.misc 5 | import numpy as np 6 | from scipy.stats import multivariate_normal 7 | import scipy.io as sio 8 | import csv 9 | 10 | csv_file = 'lip_train_set.csv' 11 | 12 | with open(csv_file, "r") as input_file: 13 | 14 | for row in csv.reader(input_file): 15 | 16 | img_id = row.pop(0)[:-4] 17 | print img_id 18 | 19 | image_path = './images/{}.jpg'.format(img_id) 20 | img = scipy.misc.imread(image_path).astype(np.float) 21 | rows = img.shape[0] 22 | cols = img.shape[1] 23 | heatmap_ = np.zeros((rows, cols, 16), dtype=np.float64) 24 | 25 | for idx, point in enumerate(row): 26 | if 'nan' in point: 27 | point = 0 28 | if idx % 3 == 0: 29 | c_ = int(point) 30 | c_ = min(c_, cols-1) 31 | c_ = max(c_, 0) 32 | elif idx % 3 == 1 : 33 | r_ = int(point) 34 | r_ = min(r_, rows-1) 35 | r_ = max(r_, 0) 36 | if c_ + r_ > 0: 37 | var = multivariate_normal(mean=[r_, c_], cov=64) 38 | l1 = max(r_-25, 0) 39 | r1 = min(r_+25, rows-1) 40 | l2 = max(c_-25, 0) 41 | r2 = min(c_+25, cols-1) 42 | for i in xrange(l1, r1): 43 | for j in xrange(l2, r2): 44 | heatmap_[i, j, int(idx / 3)] = var.pdf([i, j]) * 400 45 | save_path = './heatmap/{}_{}.png'.format(img_id, int(idx/3)) 46 | scipy.misc.imsave(save_path, heatmap_[:,:,int(idx/3)]) 47 | heatsum_ = np.sum(heatmap_, axis=2) 48 | -------------------------------------------------------------------------------- /evaluate_parsing_JPPNet-s2.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | from datetime import datetime 4 | import os 5 | import sys 6 | import time 7 | import scipy.misc 8 | import cv2 9 | from PIL import Image 10 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 11 | 12 | import tensorflow as tf 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | from utils import * 16 | from LIP_model import * 17 | 18 | N_CLASSES = 20 19 | INPUT_SIZE = (384, 384) 20 | DATA_DIRECTORY = './datasets/examples' 21 | DATA_LIST_PATH = './datasets/examples/list/val.txt' 22 | NUM_STEPS = 6 # Number of images in the validation set. 23 | RESTORE_FROM = './checkpoint/JPPNet-s2' 24 | OUTPUT_DIR = './output/parsing/val' 25 | if not os.path.exists(OUTPUT_DIR): 26 | os.makedirs(OUTPUT_DIR) 27 | 28 | def main(): 29 | """Create the model and start the evaluation process.""" 30 | 31 | # Create queue coordinator. 32 | coord = tf.train.Coordinator() 33 | h, w = INPUT_SIZE 34 | # Load reader. 35 | with tf.name_scope("create_inputs"): 36 | reader = ImageReader(DATA_DIRECTORY, DATA_LIST_PATH, None, False, False, coord) 37 | image = reader.image 38 | image_rev = tf.reverse(image, tf.stack([1])) 39 | image_list = reader.image_list 40 | 41 | image_batch_origin = tf.stack([image, image_rev]) 42 | image_batch = tf.image.resize_images(image_batch_origin, [int(h), int(w)]) 43 | image_batch075 = tf.image.resize_images(image_batch_origin, [int(h * 0.75), int(w * 0.75)]) 44 | image_batch125 = tf.image.resize_images(image_batch_origin, [int(h * 1.25), int(w * 1.25)]) 45 | 46 | # Create network. 47 | with tf.variable_scope('', reuse=False): 48 | net_100 = JPPNetModel({'data': image_batch}, is_training=False, n_classes=N_CLASSES) 49 | with tf.variable_scope('', reuse=True): 50 | net_075 = JPPNetModel({'data': image_batch075}, is_training=False, n_classes=N_CLASSES) 51 | with tf.variable_scope('', reuse=True): 52 | net_125 = JPPNetModel({'data': image_batch125}, is_training=False, n_classes=N_CLASSES) 53 | 54 | 55 | # parsing net 56 | parsing_fea1_100 = net_100.layers['res5d_branch2b_parsing'] 57 | parsing_fea1_075 = net_075.layers['res5d_branch2b_parsing'] 58 | parsing_fea1_125 = net_125.layers['res5d_branch2b_parsing'] 59 | 60 | parsing_out1_100 = net_100.layers['fc1_human'] 61 | parsing_out1_075 = net_075.layers['fc1_human'] 62 | parsing_out1_125 = net_125.layers['fc1_human'] 63 | 64 | # pose net 65 | resnet_fea_100 = net_100.layers['res4b22_relu'] 66 | resnet_fea_075 = net_075.layers['res4b22_relu'] 67 | resnet_fea_125 = net_125.layers['res4b22_relu'] 68 | 69 | with tf.variable_scope('', reuse=False): 70 | pose_out1_100, pose_fea1_100 = pose_net(resnet_fea_100, 'fc1_pose') 71 | pose_out2_100, pose_fea2_100 = pose_refine(pose_out1_100, parsing_out1_100, pose_fea1_100, name='fc2_pose') 72 | parsing_out2_100, parsing_fea2_100 = parsing_refine(parsing_out1_100, pose_out1_100, parsing_fea1_100, name='fc2_parsing') 73 | parsing_out3_100, parsing_fea3_100 = parsing_refine(parsing_out2_100, pose_out2_100, parsing_fea2_100, name='fc3_parsing') 74 | 75 | with tf.variable_scope('', reuse=True): 76 | pose_out1_075, pose_fea1_075 = pose_net(resnet_fea_075, 'fc1_pose') 77 | pose_out2_075, pose_fea2_075 = pose_refine(pose_out1_075, parsing_out1_075, pose_fea1_075, name='fc2_pose') 78 | parsing_out2_075, parsing_fea2_075 = parsing_refine(parsing_out1_075, pose_out1_075, parsing_fea1_075, name='fc2_parsing') 79 | parsing_out3_075, parsing_fea3_075 = parsing_refine(parsing_out2_075, pose_out2_075, parsing_fea2_075, name='fc3_parsing') 80 | 81 | with tf.variable_scope('', reuse=True): 82 | pose_out1_125, pose_fea1_125 = pose_net(resnet_fea_125, 'fc1_pose') 83 | pose_out2_125, pose_fea2_125 = pose_refine(pose_out1_125, parsing_out1_125, pose_fea1_125, name='fc2_pose') 84 | parsing_out2_125, parsing_fea2_125 = parsing_refine(parsing_out1_125, pose_out1_125, parsing_fea1_125, name='fc2_parsing') 85 | parsing_out3_125, parsing_fea3_125 = parsing_refine(parsing_out2_125, pose_out2_125, parsing_fea2_125, name='fc3_parsing') 86 | 87 | 88 | parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, tf.shape(image_batch_origin)[1:3,]), 89 | tf.image.resize_images(parsing_out1_075, tf.shape(image_batch_origin)[1:3,]), 90 | tf.image.resize_images(parsing_out1_125, tf.shape(image_batch_origin)[1:3,])]), axis=0) 91 | parsing_out2 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out2_100, tf.shape(image_batch_origin)[1:3,]), 92 | tf.image.resize_images(parsing_out2_075, tf.shape(image_batch_origin)[1:3,]), 93 | tf.image.resize_images(parsing_out2_125, tf.shape(image_batch_origin)[1:3,])]), axis=0) 94 | parsing_out3 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out3_100, tf.shape(image_batch_origin)[1:3,]), 95 | tf.image.resize_images(parsing_out3_075, tf.shape(image_batch_origin)[1:3,]), 96 | tf.image.resize_images(parsing_out3_125, tf.shape(image_batch_origin)[1:3,])]), axis=0) 97 | 98 | raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) 99 | head_output, tail_output = tf.unstack(raw_output, num=2, axis=0) 100 | tail_list = tf.unstack(tail_output, num=20, axis=2) 101 | tail_list_rev = [None] * 20 102 | for xx in range(14): 103 | tail_list_rev[xx] = tail_list[xx] 104 | tail_list_rev[14] = tail_list[15] 105 | tail_list_rev[15] = tail_list[14] 106 | tail_list_rev[16] = tail_list[17] 107 | tail_list_rev[17] = tail_list[16] 108 | tail_list_rev[18] = tail_list[19] 109 | tail_list_rev[19] = tail_list[18] 110 | tail_output_rev = tf.stack(tail_list_rev, axis=2) 111 | tail_output_rev = tf.reverse(tail_output_rev, tf.stack([1])) 112 | 113 | 114 | raw_output_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]), axis=0) 115 | raw_output_all = tf.expand_dims(raw_output_all, dim=0) 116 | raw_output_all = tf.argmax(raw_output_all, dimension=3) 117 | pred_all = tf.expand_dims(raw_output_all, dim=3) # Create 4-d tensor. 118 | 119 | # Which variables to load. 120 | restore_var = tf.global_variables() 121 | # Set up tf session and initialize variables. 122 | config = tf.ConfigProto() 123 | config.gpu_options.allow_growth = True 124 | sess = tf.Session(config=config) 125 | init = tf.global_variables_initializer() 126 | 127 | sess.run(init) 128 | sess.run(tf.local_variables_initializer()) 129 | 130 | # Load weights. 131 | loader = tf.train.Saver(var_list=restore_var) 132 | if RESTORE_FROM is not None: 133 | if load(loader, sess, RESTORE_FROM): 134 | print(" [*] Load SUCCESS") 135 | else: 136 | print(" [!] Load failed...") 137 | 138 | # Start queue threads. 139 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 140 | 141 | 142 | # Iterate over training steps. 143 | for step in range(NUM_STEPS): 144 | parsing_ = sess.run(pred_all) 145 | if step % 100 == 0: 146 | print('step {:d}'.format(step)) 147 | print (image_list[step]) 148 | img_split = image_list[step].split('/') 149 | img_id = img_split[-1][:-4] 150 | 151 | msk = decode_labels(parsing_, num_classes=N_CLASSES) 152 | parsing_im = Image.fromarray(msk[0]) 153 | parsing_im.save('{}/{}_vis.png'.format(OUTPUT_DIR, img_id)) 154 | cv2.imwrite('{}/{}.png'.format(OUTPUT_DIR, img_id), parsing_[0,:,:,0]) 155 | 156 | coord.request_stop() 157 | coord.join(threads) 158 | 159 | if __name__ == '__main__': 160 | main() 161 | 162 | 163 | ##############################################################333 164 | -------------------------------------------------------------------------------- /evaluate_pose_JPPNet-s2.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import time 4 | from glob import glob 5 | import tensorflow as tf 6 | import numpy as np 7 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 8 | from utils import * 9 | from LIP_model import * 10 | import matplotlib.pyplot as plt 11 | import scipy.misc 12 | import scipy.io as sio 13 | 14 | 15 | NUM_STEPS = 6 # Number of images in the validation set. 16 | INPUT_SIZE = (384, 384) 17 | N_CLASSES = 20 18 | DATA_DIRECTORY = './datasets/examples' 19 | DATA_LIST_PATH = './datasets/examples/list/val.txt' 20 | RESTORE_FROM = './checkpoint/JPPNet-s2' 21 | OUTPUT_DIR = './output/pose/val' 22 | if not os.path.exists(OUTPUT_DIR): 23 | os.makedirs(OUTPUT_DIR) 24 | 25 | def main(): 26 | """Create the model and start the evaluation process.""" 27 | 28 | # Create queue coordinator. 29 | coord = tf.train.Coordinator() 30 | h, w = INPUT_SIZE 31 | # Load reader. 32 | with tf.name_scope("create_inputs"): 33 | reader = ImageReader(DATA_DIRECTORY, DATA_LIST_PATH, None, False, False, coord) 34 | image = reader.image 35 | image_rev = tf.reverse(image, tf.stack([1])) 36 | image_list = reader.image_list 37 | 38 | image_batch_origin = tf.stack([image, image_rev]) 39 | image_batch = tf.image.resize_images(image_batch_origin, [int(h), int(w)]) 40 | image_batch125 = tf.image.resize_images(image_batch_origin, [int(h * 1.25), int(w * 1.25)]) 41 | image_batch075 = tf.image.resize_images(image_batch_origin, [int(h * 0.75), int(w * 0.75)]) 42 | 43 | # Create network. 44 | with tf.variable_scope('', reuse=False): 45 | net_100 = JPPNetModel({'data': image_batch}, is_training=False, n_classes=N_CLASSES) 46 | with tf.variable_scope('', reuse=True): 47 | net_125 = JPPNetModel({'data': image_batch125}, is_training=False, n_classes=N_CLASSES) 48 | with tf.variable_scope('', reuse=True): 49 | net_075 = JPPNetModel({'data': image_batch075}, is_training=False, n_classes=N_CLASSES) 50 | 51 | 52 | # parsing net 53 | parsing_fea1_100 = net_100.layers['res5d_branch2b_parsing'] 54 | parsing_fea1_125 = net_125.layers['res5d_branch2b_parsing'] 55 | parsing_fea1_075 = net_075.layers['res5d_branch2b_parsing'] 56 | 57 | parsing_out1_100 = net_100.layers['fc1_human'] 58 | parsing_out1_125 = net_125.layers['fc1_human'] 59 | parsing_out1_075 = net_075.layers['fc1_human'] 60 | 61 | # pose net 62 | resnet_fea_100 = net_100.layers['res4b22_relu'] 63 | resnet_fea_125 = net_125.layers['res4b22_relu'] 64 | resnet_fea_075 = net_075.layers['res4b22_relu'] 65 | 66 | with tf.variable_scope('', reuse=False): 67 | pose_out1_100, pose_fea1_100 = pose_net(resnet_fea_100, 'fc1_pose') 68 | pose_out2_100, pose_fea2_100 = pose_refine(pose_out1_100, parsing_out1_100, pose_fea1_100, name='fc2_pose') 69 | parsing_out2_100, parsing_fea2_100 = parsing_refine(parsing_out1_100, pose_out1_100, parsing_fea1_100, name='fc2_parsing') 70 | pose_out3_100, pose_fea3_100 = pose_refine(pose_out2_100, parsing_out2_100, pose_fea2_100, name='fc3_pose') 71 | 72 | with tf.variable_scope('', reuse=True): 73 | pose_out1_125, pose_fea1_125 = pose_net(resnet_fea_125, 'fc1_pose') 74 | pose_out2_125, pose_fea2_125 = pose_refine(pose_out1_125, parsing_out1_125, pose_fea1_125, name='fc2_pose') 75 | parsing_out2_125, parsing_fea2_125 = parsing_refine(parsing_out1_125, pose_out1_125, parsing_fea1_125, name='fc2_parsing') 76 | pose_out3_125, pose_fea3_125 = pose_refine(pose_out2_125, parsing_out2_125, pose_fea2_125, name='fc3_pose') 77 | 78 | with tf.variable_scope('', reuse=True): 79 | pose_out1_075, pose_fea1_075 = pose_net(resnet_fea_075, 'fc1_pose') 80 | pose_out2_075, pose_fea2_075 = pose_refine(pose_out1_075, parsing_out1_075, pose_fea1_075, name='fc2_pose') 81 | parsing_out2_075, parsing_fea2_075 = parsing_refine(parsing_out1_075, pose_out1_075, parsing_fea1_075, name='fc2_parsing') 82 | pose_out3_075, pose_fea3_075 = pose_refine(pose_out2_075, parsing_out2_075, pose_fea2_075, name='fc3_pose') 83 | 84 | 85 | pose_out3 = tf.reduce_mean(tf.stack([tf.image.resize_nearest_neighbor(pose_out3_100, tf.shape(image_batch_origin)[1:3,]), 86 | tf.image.resize_nearest_neighbor(pose_out3_125, tf.shape(image_batch_origin)[1:3,]), 87 | tf.image.resize_nearest_neighbor(pose_out3_075, tf.shape(image_batch_origin)[1:3,])]), axis=0) 88 | 89 | head_output, tail_output = tf.unstack(pose_out3, num=2, axis=0) 90 | tail_list = tf.unstack(tail_output, num=16, axis=2) 91 | tail_list_rev = [None] * 16 92 | tail_list_rev[0] = tail_list[5] 93 | tail_list_rev[1] = tail_list[4] 94 | tail_list_rev[2] = tail_list[3] 95 | tail_list_rev[3] = tail_list[2] 96 | tail_list_rev[4] = tail_list[1] 97 | tail_list_rev[5] = tail_list[0] 98 | tail_list_rev[10] = tail_list[15] 99 | tail_list_rev[11] = tail_list[14] 100 | tail_list_rev[12] = tail_list[13] 101 | tail_list_rev[13] = tail_list[12] 102 | tail_list_rev[14] = tail_list[11] 103 | tail_list_rev[15] = tail_list[10] 104 | tail_list_rev[6] = tail_list[6] 105 | tail_list_rev[7] = tail_list[7] 106 | tail_list_rev[8] = tail_list[8] 107 | tail_list_rev[9] = tail_list[9] 108 | tail_output_rev = tf.stack(tail_list_rev, axis=2) 109 | tail_output_rev = tf.reverse(tail_output_rev, tf.stack([1])) 110 | 111 | output_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]), axis=0) 112 | output_all = tf.expand_dims(output_all, dim=0) 113 | 114 | # Which variables to load. 115 | restore_var = tf.global_variables() 116 | 117 | # Set up tf session and initialize variables. 118 | config = tf.ConfigProto() 119 | config.gpu_options.allow_growth = True 120 | sess = tf.Session(config=config) 121 | init = tf.global_variables_initializer() 122 | 123 | sess.run(init) 124 | sess.run(tf.local_variables_initializer()) 125 | 126 | # Load weights. 127 | loader = tf.train.Saver(var_list=restore_var) 128 | if RESTORE_FROM is not None: 129 | if load(loader, sess, RESTORE_FROM): 130 | print(" [*] Load SUCCESS") 131 | else: 132 | print(" [!] Load failed...") 133 | 134 | # Start queue threads. 135 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 136 | 137 | 138 | # Iterate over training steps. 139 | for step in range(NUM_STEPS): 140 | predict_ = sess.run(output_all) 141 | save_lip_images(image_list[step], predict_, OUTPUT_DIR) 142 | if step % 100 == 0: 143 | print('step {:d}'.format(step)) 144 | print (image_list[step]) 145 | 146 | coord.request_stop() 147 | coord.join(threads) 148 | 149 | 150 | def save_lip_images(image_path, samples, out_dir): 151 | img_A = scipy.misc.imread(image_path).astype(np.float) 152 | rows = img_A.shape[0] 153 | cols = img_A.shape[1] 154 | image = samples[0] 155 | img_split = image_path.split('/') 156 | img_id = img_split[-1][:-4] 157 | with open('{}/{}.txt'.format(out_dir, img_id), 'w') as f: 158 | for p in xrange(image.shape[2]): 159 | channel_ = image[:,:,p] 160 | if channel_.shape[0] != rows or channel_.shape[1] != cols: 161 | print ('sizes do not match...') 162 | channel_ = scipy.misc.imresize(channel_, [rows, cols], interp='nearest') 163 | r_, c_ = np.unravel_index(channel_.argmax(), channel_.shape) 164 | f.write('%d %d ' % (int(c_), int(r_))) 165 | 166 | 167 | if __name__ == '__main__': 168 | main() 169 | 170 | 171 | -------------------------------------------------------------------------------- /get_maximum_square_from_segmented_image.py: -------------------------------------------------------------------------------- 1 | # This function is used to get the largest square from the cropped and segmented image. It can be further used to find patterns 2 | import cv2 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from PIL import Image 6 | import time 7 | from collections import namedtuple 8 | import glob 9 | 10 | def printMaxSubSquare(M): 11 | """" find the largest square """ 12 | R = len(M) # no. of rows in M[][] 13 | C = len(M[0]) # no. of columns in M[][] 14 | 15 | S = [[0 for k in range(C)] for l in range(R)] 16 | # here we have set the first row and column of S[][] 17 | 18 | # Construct other entries 19 | for i in range(1, R): 20 | for j in range(1, C): 21 | if (M[i][j] == 1): 22 | S[i][j] = min(S[i][j-1], S[i-1][j], 23 | S[i-1][j-1]) + 1 24 | else: 25 | S[i][j] = 0 26 | 27 | # Find the maximum entry and 28 | # indices of maximum entry in S[][] 29 | max_of_s = S[0][0] 30 | max_i = 0 31 | max_j = 0 32 | for i in range(R): 33 | for j in range(C): 34 | if (max_of_s < S[i][j]): 35 | max_of_s = S[i][j] 36 | max_i = i 37 | max_j = j 38 | 39 | print("Maximum size sub-matrix is: ") 40 | count_i = 0 41 | count_j = 0 42 | position_matrix = [] 43 | for i in range(max_i, max_i - max_of_s, -1): 44 | for j in range(max_j, max_j - max_of_s, -1): 45 | position_matrix.append((i,j)) 46 | count_i+=1 47 | 48 | print('count_i :' + str(count_i)) 49 | print('count_j :' + str(count_j)) 50 | return position_matrix 51 | 52 | 53 | def crop_square_portion(image_file_name): 54 | """" crop and save image """ 55 | image_file_name_list = image_file_name.split('_') 56 | vis_file_name = '_'.join(image_file_name_list[:2])+'_vis.png' 57 | save_file_name = '_'.join(image_file_name_list[:3])+'_square.png' 58 | cloth_type = image_file_name_list[-2] 59 | list_index = cloth_type_list.index(cloth_type) 60 | light_shade = light_shade_list[list_index] 61 | dark_shade = dark_shade_list[list_index] 62 | print(light_shade,dark_shade) 63 | #read input image 64 | img = cv2.imread(INPUT_DIR+vis_file_name,cv2.COLOR_BGR2RGB) 65 | 66 | #detect shades from vis: 67 | hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) 68 | mask = cv2.inRange(hsv, light_shade, dark_shade) 69 | 70 | #coverting to binary array: 71 | np_img = np.array(mask) 72 | np_img[np_img == 255] = 1 73 | 74 | #coverting to binary array: 75 | np_img = np.array(mask) 76 | np_img[np_img == 255] = 1 77 | 78 | #find and plot the largest square 79 | var = printMaxSubSquare(np_img) 80 | for point in var: 81 | a,b = point 82 | pt = (b,a) 83 | cv2.circle(np_img,pt,5,(200,0,0),2) 84 | 85 | ##convert mask to bunary mask 86 | np_img[np_img != 200] = 0 87 | print('final mask shape:') 88 | print(np_img.shape) 89 | 90 | ##crop and save the square image 91 | img = cv2.imread(INPUT_DIR+image_file_name,cv2.COLOR_BGR2RGB) 92 | print('input image shape:') 93 | print(img.shape) 94 | x,y,w,h = cv2.boundingRect(np_img) 95 | crop_img = img[y:y+h,x:x+w] 96 | print('cropped image shape:') 97 | print(crop_img.shape) 98 | cv2.imwrite(OUTPUT_DIR+save_file_name, crop_img) 99 | 100 | 101 | if __name__ == "__main__": 102 | INPUT_DIR = r' set your input folder where segmented images are there' 103 | OUTPUT_DIR = r' set your output images' 104 | cloth_type_list = ['UpperClothes','Dress','Pants','Scarf','Skirt','Coat'] 105 | light_shade_list = [(100, 240, 255),(0,255,70),(0,255,70),(10,150,125),(50,0,70),(10,100,200)] 106 | dark_shade_list = [(190, 255, 255),(0,255,200),(100,255,200),(100,160,130),(60,255,200),(20,255,255)] 107 | 108 | #for each bgcropped file read, pass to crop_image function 109 | for file in glob.glob(INPUT_DIR+'*_cropped.png'): 110 | print(file) 111 | image_file_name = file.split('\\')[-1] 112 | crop_square_portion(image_file_name) 113 | 114 | -------------------------------------------------------------------------------- /kaffe/__init__.py: -------------------------------------------------------------------------------- 1 | from .graph import GraphBuilder, NodeMapper 2 | from .errors import KaffeError, print_stderr 3 | 4 | from . import tensorflow 5 | -------------------------------------------------------------------------------- /kaffe/caffe/__init__.py: -------------------------------------------------------------------------------- 1 | from .resolver import get_caffe_resolver, has_pycaffe 2 | -------------------------------------------------------------------------------- /kaffe/caffe/resolver.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | SHARED_CAFFE_RESOLVER = None 4 | 5 | class CaffeResolver(object): 6 | def __init__(self): 7 | self.import_caffe() 8 | 9 | def import_caffe(self): 10 | self.caffe = None 11 | try: 12 | # Try to import PyCaffe first 13 | import caffe 14 | self.caffe = caffe 15 | except ImportError: 16 | # Fall back to the protobuf implementation 17 | from . import caffe_pb2 18 | self.caffepb = caffe_pb2 19 | show_fallback_warning() 20 | if self.caffe: 21 | # Use the protobuf code from the imported distribution. 22 | # This way, Caffe variants with custom layers will work. 23 | self.caffepb = self.caffe.proto.caffe_pb2 24 | self.NetParameter = self.caffepb.NetParameter 25 | 26 | def has_pycaffe(self): 27 | return self.caffe is not None 28 | 29 | def get_caffe_resolver(): 30 | global SHARED_CAFFE_RESOLVER 31 | if SHARED_CAFFE_RESOLVER is None: 32 | SHARED_CAFFE_RESOLVER = CaffeResolver() 33 | return SHARED_CAFFE_RESOLVER 34 | 35 | def has_pycaffe(): 36 | return get_caffe_resolver().has_pycaffe() 37 | 38 | def show_fallback_warning(): 39 | msg = ''' 40 | ------------------------------------------------------------ 41 | WARNING: PyCaffe not found! 42 | Falling back to a pure protocol buffer implementation. 43 | * Conversions will be drastically slower. 44 | * This backend is UNTESTED! 45 | ------------------------------------------------------------ 46 | 47 | ''' 48 | sys.stderr.write(msg) 49 | -------------------------------------------------------------------------------- /kaffe/errors.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | class KaffeError(Exception): 4 | pass 5 | 6 | def print_stderr(msg): 7 | sys.stderr.write('%s\n' % msg) 8 | -------------------------------------------------------------------------------- /kaffe/graph.py: -------------------------------------------------------------------------------- 1 | from google.protobuf import text_format 2 | 3 | from .caffe import get_caffe_resolver 4 | from .errors import KaffeError, print_stderr 5 | from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch 6 | from .shapes import TensorShape 7 | 8 | class Node(object): 9 | 10 | def __init__(self, name, kind, layer=None): 11 | self.name = name 12 | self.kind = kind 13 | self.layer = LayerAdapter(layer, kind) if layer else None 14 | self.parents = [] 15 | self.children = [] 16 | self.data = None 17 | self.output_shape = None 18 | self.metadata = {} 19 | 20 | def add_parent(self, parent_node): 21 | assert parent_node not in self.parents 22 | self.parents.append(parent_node) 23 | if self not in parent_node.children: 24 | parent_node.children.append(self) 25 | 26 | def add_child(self, child_node): 27 | assert child_node not in self.children 28 | self.children.append(child_node) 29 | if self not in child_node.parents: 30 | child_node.parents.append(self) 31 | 32 | def get_only_parent(self): 33 | if len(self.parents) != 1: 34 | raise KaffeError('Node (%s) expected to have 1 parent. Found %s.' % 35 | (self, len(self.parents))) 36 | return self.parents[0] 37 | 38 | @property 39 | def parameters(self): 40 | if self.layer is not None: 41 | return self.layer.parameters 42 | return None 43 | 44 | def __str__(self): 45 | return '[%s] %s' % (self.kind, self.name) 46 | 47 | def __repr__(self): 48 | return '%s (0x%x)' % (self.name, id(self)) 49 | 50 | 51 | class Graph(object): 52 | 53 | def __init__(self, nodes=None, name=None): 54 | self.nodes = nodes or [] 55 | self.node_lut = {node.name: node for node in self.nodes} 56 | self.name = name 57 | 58 | def add_node(self, node): 59 | self.nodes.append(node) 60 | self.node_lut[node.name] = node 61 | 62 | def get_node(self, name): 63 | try: 64 | return self.node_lut[name] 65 | except KeyError: 66 | raise KaffeError('Layer not found: %s' % name) 67 | 68 | def get_input_nodes(self): 69 | return [node for node in self.nodes if len(node.parents) == 0] 70 | 71 | def get_output_nodes(self): 72 | return [node for node in self.nodes if len(node.children) == 0] 73 | 74 | def topologically_sorted(self): 75 | sorted_nodes = [] 76 | unsorted_nodes = list(self.nodes) 77 | temp_marked = set() 78 | perm_marked = set() 79 | 80 | def visit(node): 81 | if node in temp_marked: 82 | raise KaffeError('Graph is not a DAG.') 83 | if node in perm_marked: 84 | return 85 | temp_marked.add(node) 86 | for child in node.children: 87 | visit(child) 88 | perm_marked.add(node) 89 | temp_marked.remove(node) 90 | sorted_nodes.insert(0, node) 91 | 92 | while len(unsorted_nodes): 93 | visit(unsorted_nodes.pop()) 94 | return sorted_nodes 95 | 96 | def compute_output_shapes(self): 97 | sorted_nodes = self.topologically_sorted() 98 | for node in sorted_nodes: 99 | node.output_shape = TensorShape(*NodeKind.compute_output_shape(node)) 100 | 101 | def replaced(self, new_nodes): 102 | return Graph(nodes=new_nodes, name=self.name) 103 | 104 | def transformed(self, transformers): 105 | graph = self 106 | for transformer in transformers: 107 | graph = transformer(graph) 108 | if graph is None: 109 | raise KaffeError('Transformer failed: {}'.format(transformer)) 110 | assert isinstance(graph, Graph) 111 | return graph 112 | 113 | def __contains__(self, key): 114 | return key in self.node_lut 115 | 116 | def __str__(self): 117 | hdr = '{:<20} {:<30} {:>20} {:>20}'.format('Type', 'Name', 'Param', 'Output') 118 | s = [hdr, '-' * 94] 119 | for node in self.topologically_sorted(): 120 | # If the node has learned parameters, display the first one's shape. 121 | # In case of convolutions, this corresponds to the weights. 122 | data_shape = node.data[0].shape if node.data else '--' 123 | out_shape = node.output_shape or '--' 124 | s.append('{:<20} {:<30} {:>20} {:>20}'.format(node.kind, node.name, data_shape, 125 | tuple(out_shape))) 126 | return '\n'.join(s) 127 | 128 | 129 | class GraphBuilder(object): 130 | '''Constructs a model graph from a Caffe protocol buffer definition.''' 131 | 132 | def __init__(self, def_path, phase='test'): 133 | ''' 134 | def_path: Path to the model definition (.prototxt) 135 | data_path: Path to the model data (.caffemodel) 136 | phase: Either 'test' or 'train'. Used for filtering phase-specific nodes. 137 | ''' 138 | self.def_path = def_path 139 | self.phase = phase 140 | self.load() 141 | 142 | def load(self): 143 | '''Load the layer definitions from the prototxt.''' 144 | self.params = get_caffe_resolver().NetParameter() 145 | with open(self.def_path, 'rb') as def_file: 146 | text_format.Merge(def_file.read(), self.params) 147 | 148 | def filter_layers(self, layers): 149 | '''Filter out layers based on the current phase.''' 150 | phase_map = {0: 'train', 1: 'test'} 151 | filtered_layer_names = set() 152 | filtered_layers = [] 153 | for layer in layers: 154 | phase = self.phase 155 | if len(layer.include): 156 | phase = phase_map[layer.include[0].phase] 157 | if len(layer.exclude): 158 | phase = phase_map[1 - layer.include[0].phase] 159 | exclude = (phase != self.phase) 160 | # Dropout layers appear in a fair number of Caffe 161 | # test-time networks. These are just ignored. We'll 162 | # filter them out here. 163 | if (not exclude) and (phase == 'test'): 164 | exclude = (layer.type == LayerType.Dropout) 165 | if not exclude: 166 | filtered_layers.append(layer) 167 | # Guard against dupes. 168 | assert layer.name not in filtered_layer_names 169 | filtered_layer_names.add(layer.name) 170 | return filtered_layers 171 | 172 | def make_node(self, layer): 173 | '''Create a graph node for the given layer.''' 174 | kind = NodeKind.map_raw_kind(layer.type) 175 | if kind is None: 176 | raise KaffeError('Unknown layer type encountered: %s' % layer.type) 177 | # We want to use the layer's top names (the "output" names), rather than the 178 | # name attribute, which is more of readability thing than a functional one. 179 | # Other layers will refer to a node by its "top name". 180 | return Node(layer.name, kind, layer=layer) 181 | 182 | def make_input_nodes(self): 183 | ''' 184 | Create data input nodes. 185 | 186 | This method is for old-style inputs, where the input specification 187 | was not treated as a first-class layer in the prototext. 188 | Newer models use the "Input layer" type. 189 | ''' 190 | nodes = [Node(name, NodeKind.Data) for name in self.params.input] 191 | if len(nodes): 192 | input_dim = map(int, self.params.input_dim) 193 | if not input_dim: 194 | if len(self.params.input_shape) > 0: 195 | input_dim = map(int, self.params.input_shape[0].dim) 196 | else: 197 | raise KaffeError('Dimensions for input not specified.') 198 | for node in nodes: 199 | node.output_shape = tuple(input_dim) 200 | return nodes 201 | 202 | def build(self): 203 | ''' 204 | Builds the graph from the Caffe layer definitions. 205 | ''' 206 | # Get the layers 207 | layers = self.params.layers or self.params.layer 208 | # Filter out phase-excluded layers 209 | layers = self.filter_layers(layers) 210 | # Get any separately-specified input layers 211 | nodes = self.make_input_nodes() 212 | nodes += [self.make_node(layer) for layer in layers] 213 | # Initialize the graph 214 | graph = Graph(nodes=nodes, name=self.params.name) 215 | # Connect the nodes 216 | # 217 | # A note on layers and outputs: 218 | # In Caffe, each layer can produce multiple outputs ("tops") from a set of inputs 219 | # ("bottoms"). The bottoms refer to other layers' tops. The top can rewrite a bottom 220 | # (in case of in-place operations). Note that the layer's name is not used for establishing 221 | # any connectivity. It's only used for data association. By convention, a layer with a 222 | # single top will often use the same name (although this is not required). 223 | # 224 | # The current implementation only supports single-output nodes (note that a node can still 225 | # have multiple children, since multiple child nodes can refer to the single top's name). 226 | node_outputs = {} 227 | for layer in layers: 228 | node = graph.get_node(layer.name) 229 | for input_name in layer.bottom: 230 | assert input_name != layer.name 231 | parent_node = node_outputs.get(input_name) 232 | if (parent_node is None) or (parent_node == node): 233 | parent_node = graph.get_node(input_name) 234 | node.add_parent(parent_node) 235 | if len(layer.top)>1: 236 | raise KaffeError('Multiple top nodes are not supported.') 237 | for output_name in layer.top: 238 | if output_name == layer.name: 239 | # Output is named the same as the node. No further action required. 240 | continue 241 | # There are two possibilities here: 242 | # 243 | # Case 1: output_name refers to another node in the graph. 244 | # This is an "in-place operation" that overwrites an existing node. 245 | # This would create a cycle in the graph. We'll undo the in-placing 246 | # by substituting this node wherever the overwritten node is referenced. 247 | # 248 | # Case 2: output_name violates the convention layer.name == output_name. 249 | # Since we are working in the single-output regime, we will can rename it to 250 | # match the layer name. 251 | # 252 | # For both cases, future references to this top re-routes to this node. 253 | node_outputs[output_name] = node 254 | 255 | graph.compute_output_shapes() 256 | return graph 257 | 258 | 259 | class NodeMapper(NodeDispatch): 260 | 261 | def __init__(self, graph): 262 | self.graph = graph 263 | 264 | def map(self): 265 | nodes = self.graph.topologically_sorted() 266 | # Remove input nodes - we'll handle them separately. 267 | input_nodes = self.graph.get_input_nodes() 268 | nodes = [t for t in nodes if t not in input_nodes] 269 | # Decompose DAG into chains. 270 | chains = [] 271 | for node in nodes: 272 | attach_to_chain = None 273 | if len(node.parents) == 1: 274 | parent = node.get_only_parent() 275 | for chain in chains: 276 | if chain[-1] == parent: 277 | # Node is part of an existing chain. 278 | attach_to_chain = chain 279 | break 280 | if attach_to_chain is None: 281 | # Start a new chain for this node. 282 | attach_to_chain = [] 283 | chains.append(attach_to_chain) 284 | attach_to_chain.append(node) 285 | # Map each chain. 286 | mapped_chains = [] 287 | for chain in chains: 288 | mapped_chains.append(self.map_chain(chain)) 289 | return self.commit(mapped_chains) 290 | 291 | def map_chain(self, chain): 292 | return [self.map_node(node) for node in chain] 293 | 294 | def map_node(self, node): 295 | map_func = self.get_handler(node.kind, 'map') 296 | mapped_node = map_func(node) 297 | assert mapped_node is not None 298 | mapped_node.node = node 299 | return mapped_node 300 | 301 | def commit(self, mapped_chains): 302 | raise NotImplementedError('Must be implemented by subclass.') 303 | -------------------------------------------------------------------------------- /kaffe/layers.py: -------------------------------------------------------------------------------- 1 | import re 2 | import numbers 3 | from collections import namedtuple 4 | 5 | from .shapes import * 6 | 7 | LAYER_DESCRIPTORS = { 8 | 9 | # Caffe Types 10 | 'AbsVal': shape_identity, 11 | 'Accuracy': shape_scalar, 12 | 'ArgMax': shape_not_implemented, 13 | 'BatchNorm': shape_identity, 14 | 'BNLL': shape_not_implemented, 15 | 'Concat': shape_concat, 16 | 'ContrastiveLoss': shape_scalar, 17 | 'Convolution': shape_convolution, 18 | 'Deconvolution': shape_not_implemented, 19 | 'Data': shape_data, 20 | 'Dropout': shape_identity, 21 | 'DummyData': shape_data, 22 | 'EuclideanLoss': shape_scalar, 23 | 'Eltwise': shape_identity, 24 | 'Exp': shape_identity, 25 | 'Flatten': shape_not_implemented, 26 | 'HDF5Data': shape_data, 27 | 'HDF5Output': shape_identity, 28 | 'HingeLoss': shape_scalar, 29 | 'Im2col': shape_not_implemented, 30 | 'ImageData': shape_data, 31 | 'InfogainLoss': shape_scalar, 32 | 'InnerProduct': shape_inner_product, 33 | 'Input': shape_data, 34 | 'LRN': shape_identity, 35 | 'MemoryData': shape_mem_data, 36 | 'MultinomialLogisticLoss': shape_scalar, 37 | 'MVN': shape_not_implemented, 38 | 'Pooling': shape_pool, 39 | 'Power': shape_identity, 40 | 'ReLU': shape_identity, 41 | 'Scale': shape_identity, 42 | 'Sigmoid': shape_identity, 43 | 'SigmoidCrossEntropyLoss': shape_scalar, 44 | 'Silence': shape_not_implemented, 45 | 'Softmax': shape_identity, 46 | 'SoftmaxWithLoss': shape_scalar, 47 | 'Split': shape_not_implemented, 48 | 'Slice': shape_not_implemented, 49 | 'TanH': shape_identity, 50 | 'WindowData': shape_not_implemented, 51 | 'Threshold': shape_identity, 52 | 'Interp': shape_not_implemented, 53 | 'SpatialProduct': shape_not_implemented 54 | } 55 | 56 | LAYER_TYPES = LAYER_DESCRIPTORS.keys() 57 | 58 | LayerType = type('LayerType', (), {t: t for t in LAYER_TYPES}) 59 | 60 | class NodeKind(LayerType): 61 | 62 | @staticmethod 63 | def map_raw_kind(kind): 64 | if kind in LAYER_TYPES: 65 | return kind 66 | return None 67 | 68 | @staticmethod 69 | def compute_output_shape(node): 70 | try: 71 | val = LAYER_DESCRIPTORS[node.kind](node) 72 | return val 73 | except NotImplementedError: 74 | raise KaffeError('Output shape computation not implemented for type: %s' % node.kind) 75 | 76 | 77 | class NodeDispatchError(KaffeError): 78 | 79 | pass 80 | 81 | 82 | class NodeDispatch(object): 83 | 84 | @staticmethod 85 | def get_handler_name(node_kind): 86 | if len(node_kind) <= 4: 87 | # A catch-all for things like ReLU and tanh 88 | return node_kind.lower() 89 | # Convert from CamelCase to under_scored 90 | name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', node_kind) 91 | return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower() 92 | 93 | def get_handler(self, node_kind, prefix): 94 | name = self.get_handler_name(node_kind) 95 | name = '_'.join((prefix, name)) 96 | try: 97 | return getattr(self, name) 98 | except AttributeError: 99 | raise NodeDispatchError('No handler found for node kind: %s (expected: %s)' % 100 | (node_kind, name)) 101 | 102 | 103 | class LayerAdapter(object): 104 | 105 | def __init__(self, layer, kind): 106 | self.layer = layer 107 | self.kind = kind 108 | 109 | @property 110 | def parameters(self): 111 | name = NodeDispatch.get_handler_name(self.kind) 112 | name = '_'.join((name, 'param')) 113 | try: 114 | return getattr(self.layer, name) 115 | except AttributeError: 116 | raise NodeDispatchError('Caffe parameters not found for layer kind: %s' % (self.kind)) 117 | 118 | @staticmethod 119 | def get_kernel_value(scalar, repeated, idx, default=None): 120 | if scalar: 121 | return scalar 122 | if repeated: 123 | if isinstance(repeated, numbers.Number): 124 | return repeated 125 | if len(repeated) == 1: 126 | # Same value applies to all spatial dimensions 127 | return int(repeated[0]) 128 | assert idx < len(repeated) 129 | # Extract the value for the given spatial dimension 130 | return repeated[idx] 131 | if default is None: 132 | raise ValueError('Unable to determine kernel parameter!') 133 | return default 134 | 135 | @property 136 | def kernel_parameters(self): 137 | assert self.kind in (NodeKind.Convolution, NodeKind.Pooling) 138 | params = self.parameters 139 | k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0) 140 | k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1) 141 | s_h = self.get_kernel_value(params.stride_h, params.stride, 0, default=1) 142 | s_w = self.get_kernel_value(params.stride_w, params.stride, 1, default=1) 143 | p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0) 144 | p_w = self.get_kernel_value(params.pad_h, params.pad, 1, default=0) 145 | return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w) 146 | 147 | 148 | KernelParameters = namedtuple('KernelParameters', ['kernel_h', 'kernel_w', 'stride_h', 'stride_w', 149 | 'pad_h', 'pad_w']) 150 | -------------------------------------------------------------------------------- /kaffe/shapes.py: -------------------------------------------------------------------------------- 1 | import math 2 | from collections import namedtuple 3 | 4 | from .errors import KaffeError 5 | 6 | TensorShape = namedtuple('TensorShape', ['batch_size', 'channels', 'height', 'width']) 7 | 8 | 9 | def get_filter_output_shape(i_h, i_w, params, round_func): 10 | o_h = (i_h + 2 * params.pad_h - params.kernel_h) / float(params.stride_h) + 1 11 | o_w = (i_w + 2 * params.pad_w - params.kernel_w) / float(params.stride_w) + 1 12 | return (int(round_func(o_h)), int(round_func(o_w))) 13 | 14 | 15 | def get_strided_kernel_output_shape(node, round_func): 16 | assert node.layer is not None 17 | input_shape = node.get_only_parent().output_shape 18 | o_h, o_w = get_filter_output_shape(input_shape.height, input_shape.width, 19 | node.layer.kernel_parameters, round_func) 20 | params = node.layer.parameters 21 | has_c_o = hasattr(params, 'num_output') 22 | c = params.num_output if has_c_o else input_shape.channels 23 | return TensorShape(input_shape.batch_size, c, o_h, o_w) 24 | 25 | 26 | def shape_not_implemented(node): 27 | raise NotImplementedError 28 | 29 | 30 | def shape_identity(node): 31 | assert len(node.parents) > 0 32 | return node.parents[0].output_shape 33 | 34 | 35 | def shape_scalar(node): 36 | return TensorShape(1, 1, 1, 1) 37 | 38 | 39 | def shape_data(node): 40 | if node.output_shape: 41 | # Old-style input specification 42 | return node.output_shape 43 | try: 44 | # New-style input specification 45 | return map(int, node.parameters.shape[0].dim) 46 | except: 47 | # We most likely have a data layer on our hands. The problem is, 48 | # Caffe infers the dimensions of the data from the source (eg: LMDB). 49 | # We want to avoid reading datasets here. Fail for now. 50 | # This can be temporarily fixed by transforming the data layer to 51 | # Caffe's "input" layer (as is usually used in the "deploy" version). 52 | # TODO: Find a better solution for this. 53 | raise KaffeError('Cannot determine dimensions of data layer.\n' 54 | 'See comments in function shape_data for more info.') 55 | 56 | 57 | def shape_mem_data(node): 58 | params = node.parameters 59 | return TensorShape(params.batch_size, params.channels, params.height, params.width) 60 | 61 | 62 | def shape_concat(node): 63 | axis = node.layer.parameters.axis 64 | output_shape = None 65 | for parent in node.parents: 66 | if output_shape is None: 67 | output_shape = list(parent.output_shape) 68 | else: 69 | output_shape[axis] += parent.output_shape[axis] 70 | return tuple(output_shape) 71 | 72 | 73 | def shape_convolution(node): 74 | return get_strided_kernel_output_shape(node, math.floor) 75 | 76 | 77 | def shape_pool(node): 78 | return get_strided_kernel_output_shape(node, math.ceil) 79 | 80 | 81 | def shape_inner_product(node): 82 | input_shape = node.get_only_parent().output_shape 83 | return TensorShape(input_shape.batch_size, node.layer.parameters.num_output, 1, 1) 84 | -------------------------------------------------------------------------------- /kaffe/tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import TensorFlowTransformer 2 | from .network import Network 3 | -------------------------------------------------------------------------------- /kaffe/tensorflow/network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | slim = tf.contrib.slim 4 | 5 | DEFAULT_PADDING = 'SAME' 6 | 7 | 8 | def layer(op): 9 | '''Decorator for composable network layers.''' 10 | 11 | def layer_decorated(self, *args, **kwargs): 12 | # Automatically set a name if not provided. 13 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 14 | # Figure out the layer inputs. 15 | if len(self.terminals) == 0: 16 | raise RuntimeError('No input variables found for layer %s.' % name) 17 | elif len(self.terminals) == 1: 18 | layer_input = self.terminals[0] 19 | else: 20 | layer_input = list(self.terminals) 21 | # Perform the operation and get the output. 22 | layer_output = op(self, layer_input, *args, **kwargs) 23 | # Add to layer LUT. 24 | self.layers[name] = layer_output 25 | # This output is now the input for the next layer. 26 | self.feed(layer_output) 27 | # Return self for chained calls. 28 | return self 29 | 30 | return layer_decorated 31 | 32 | 33 | class Network(object): 34 | 35 | def __init__(self, inputs, trainable=True, is_training=False, n_classes=20): 36 | # The input nodes for this network 37 | self.inputs = inputs 38 | # The current list of terminal nodes 39 | self.terminals = [] 40 | # Mapping from layer names to layers 41 | self.layers = dict(inputs) 42 | # If true, the resulting variables are set as trainable 43 | self.trainable = trainable 44 | # Switch variable for dropout 45 | self.use_dropout = tf.placeholder_with_default(tf.constant(1.0), 46 | shape=[], 47 | name='use_dropout') 48 | self.setup(is_training, n_classes) 49 | 50 | def setup(self, is_training, n_classes): 51 | '''Construct the network. ''' 52 | raise NotImplementedError('Must be implemented by the subclass.') 53 | 54 | def load(self, data_path, session, ignore_missing=False): 55 | '''Load network weights. 56 | data_path: The path to the numpy-serialized network weights 57 | session: The current TensorFlow session 58 | ignore_missing: If true, serialized weights for missing layers are ignored. 59 | ''' 60 | data_dict = np.load(data_path).item() 61 | for op_name in data_dict: 62 | with tf.variable_scope(op_name, reuse=True): 63 | for param_name, data in data_dict[op_name].iteritems(): 64 | try: 65 | var = tf.get_variable(param_name) 66 | session.run(var.assign(data)) 67 | except ValueError: 68 | if not ignore_missing: 69 | raise 70 | 71 | def feed(self, *args): 72 | '''Set the input(s) for the next operation by replacing the terminal nodes. 73 | The arguments can be either layer names or the actual layers. 74 | ''' 75 | assert len(args) != 0 76 | self.terminals = [] 77 | for fed_layer in args: 78 | if isinstance(fed_layer, str): 79 | try: 80 | fed_layer = self.layers[fed_layer] 81 | except KeyError: 82 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 83 | self.terminals.append(fed_layer) 84 | return self 85 | 86 | def get_output(self): 87 | '''Returns the current network output.''' 88 | return self.terminals[-1] 89 | 90 | def get_unique_name(self, prefix): 91 | '''Returns an index-suffixed unique name for the given prefix. 92 | This is used for auto-generating layer names based on the type-prefix. 93 | ''' 94 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 95 | return '%s_%d' % (prefix, ident) 96 | 97 | def make_var(self, name, shape): 98 | '''Creates a new TensorFlow variable.''' 99 | return tf.get_variable(name, shape, trainable=self.trainable) 100 | 101 | def validate_padding(self, padding): 102 | '''Verifies that the padding is one of the supported ones.''' 103 | assert padding in ('SAME', 'VALID') 104 | 105 | @layer 106 | def conv(self, 107 | input, 108 | k_h, 109 | k_w, 110 | c_o, 111 | s_h, 112 | s_w, 113 | name, 114 | relu=True, 115 | padding=DEFAULT_PADDING, 116 | group=1, 117 | biased=True): 118 | # Verify that the padding is acceptable 119 | self.validate_padding(padding) 120 | # Get the number of channels in the input 121 | c_i = input.get_shape()[-1] 122 | # Verify that the grouping parameter is valid 123 | assert c_i % group == 0 124 | assert c_o % group == 0 125 | # Convolution for a given input and kernel 126 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) 127 | with tf.variable_scope(name) as scope: 128 | kernel = self.make_var('weights', shape=[k_h, k_w, int(c_i) / group, c_o]) 129 | if group == 1: 130 | # This is the common-case. Convolve the input without any further complications. 131 | output = convolve(input, kernel) 132 | else: 133 | # Split the input into groups and then convolve each of them independently 134 | input_groups = tf.split(3, group, input) 135 | kernel_groups = tf.split(3, group, kernel) 136 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] 137 | # Concatenate the groups 138 | output = tf.concat(3, output_groups) 139 | # Add the biases 140 | if biased: 141 | biases = self.make_var('biases', [c_o]) 142 | output = tf.nn.bias_add(output, biases) 143 | if relu: 144 | # ReLU non-linearity 145 | output = tf.nn.relu(output, name=scope.name) 146 | return output 147 | 148 | @layer 149 | def atrous_conv(self, 150 | input, 151 | k_h, 152 | k_w, 153 | c_o, 154 | dilation, 155 | name, 156 | relu=True, 157 | padding=DEFAULT_PADDING, 158 | group=1, 159 | biased=True): 160 | # Verify that the padding is acceptable 161 | self.validate_padding(padding) 162 | # Get the number of channels in the input 163 | c_i = input.get_shape()[-1] 164 | # Verify that the grouping parameter is valid 165 | assert c_i % group == 0 166 | assert c_o % group == 0 167 | # Convolution for a given input and kernel 168 | convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=padding) 169 | with tf.variable_scope(name) as scope: 170 | kernel = self.make_var('weights', shape=[k_h, k_w, int(c_i) / group, c_o]) 171 | if group == 1: 172 | # This is the common-case. Convolve the input without any further complications. 173 | output = convolve(input, kernel) 174 | else: 175 | # Split the input into groups and then convolve each of them independently 176 | input_groups = tf.split(3, group, input) 177 | kernel_groups = tf.split(3, group, kernel) 178 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] 179 | # Concatenate the groups 180 | output = tf.concat(3, output_groups) 181 | # Add the biases 182 | if biased: 183 | biases = self.make_var('biases', [c_o]) 184 | output = tf.nn.bias_add(output, biases) 185 | if relu: 186 | # ReLU non-linearity 187 | output = tf.nn.relu(output, name=scope.name) 188 | return output 189 | 190 | @layer 191 | def relu(self, input, name): 192 | return tf.nn.relu(input, name=name) 193 | 194 | @layer 195 | def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): 196 | self.validate_padding(padding) 197 | return tf.nn.max_pool(input, 198 | ksize=[1, k_h, k_w, 1], 199 | strides=[1, s_h, s_w, 1], 200 | padding=padding, 201 | name=name) 202 | 203 | @layer 204 | def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): 205 | self.validate_padding(padding) 206 | return tf.nn.avg_pool(input, 207 | ksize=[1, k_h, k_w, 1], 208 | strides=[1, s_h, s_w, 1], 209 | padding=padding, 210 | name=name) 211 | 212 | @layer 213 | def lrn(self, input, radius, alpha, beta, name, bias=1.0): 214 | return tf.nn.local_response_normalization(input, 215 | depth_radius=radius, 216 | alpha=alpha, 217 | beta=beta, 218 | bias=bias, 219 | name=name) 220 | 221 | @layer 222 | def concat(self, inputs, axis, name): 223 | return tf.concat(concat_dim=axis, values=inputs, name=name) 224 | 225 | @layer 226 | def add(self, inputs, name): 227 | return tf.add_n(inputs, name=name) 228 | 229 | @layer 230 | def fc(self, input, num_out, name, relu=True): 231 | with tf.variable_scope(name) as scope: 232 | input_shape = input.get_shape() 233 | if input_shape.ndims == 4: 234 | # The input is spatial. Vectorize it first. 235 | dim = 1 236 | for d in input_shape[1:].as_list(): 237 | dim *= d 238 | feed_in = tf.reshape(input, [-1, dim]) 239 | else: 240 | feed_in, dim = (input, input_shape[-1].value) 241 | weights = self.make_var('weights', shape=[dim, num_out]) 242 | biases = self.make_var('biases', [num_out]) 243 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b 244 | fc = op(feed_in, weights, biases, name=scope.name) 245 | return fc 246 | 247 | @layer 248 | def softmax(self, input, name): 249 | input_shape = map(lambda v: v.value, input.get_shape()) 250 | if len(input_shape) > 2: 251 | # For certain models (like NiN), the singleton spatial dimensions 252 | # need to be explicitly squeezed, since they're not broadcast-able 253 | # in TensorFlow's NHWC ordering (unlike Caffe's NCHW). 254 | if input_shape[1] == 1 and input_shape[2] == 1: 255 | input = tf.squeeze(input, squeeze_dims=[1, 2]) 256 | else: 257 | raise ValueError('Rank 2 tensor input expected for softmax!') 258 | return tf.nn.softmax(input, name) 259 | 260 | @layer 261 | def batch_normalization(self, input, name, is_training, activation_fn=None, scale=True): 262 | with tf.variable_scope(name) as scope: 263 | output = slim.batch_norm( 264 | input, 265 | activation_fn=activation_fn, 266 | is_training=is_training, 267 | updates_collections=None, 268 | scale=scale, 269 | scope=scope) 270 | return output 271 | 272 | @layer 273 | def dropout(self, input, keep_prob, name): 274 | keep = 1 - self.use_dropout + (self.use_dropout * keep_prob) 275 | return tf.nn.dropout(input, keep, name=name) 276 | -------------------------------------------------------------------------------- /kaffe/tensorflow/transformer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..errors import KaffeError, print_stderr 4 | from ..graph import GraphBuilder, NodeMapper 5 | from ..layers import NodeKind 6 | from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser, 7 | BatchNormScaleBiasFuser, BatchNormPreprocessor, ParameterNamer) 8 | 9 | from . import network 10 | 11 | 12 | def get_padding_type(kernel_params, input_shape, output_shape): 13 | '''Translates Caffe's numeric padding to one of ('SAME', 'VALID'). 14 | Caffe supports arbitrary padding values, while TensorFlow only 15 | supports 'SAME' and 'VALID' modes. So, not all Caffe paddings 16 | can be translated to TensorFlow. There are some subtleties to 17 | how the padding edge-cases are handled. These are described here: 18 | https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto 19 | ''' 20 | k_h, k_w, s_h, s_w, p_h, p_w = kernel_params 21 | s_o_h = np.ceil(input_shape.height / float(s_h)) 22 | s_o_w = np.ceil(input_shape.width / float(s_w)) 23 | if (output_shape.height == s_o_h) and (output_shape.width == s_o_w): 24 | return 'SAME' 25 | v_o_h = np.ceil((input_shape.height - k_h + 1.0) / float(s_h)) 26 | v_o_w = np.ceil((input_shape.width - k_w + 1.0) / float(s_w)) 27 | if (output_shape.height == v_o_h) and (output_shape.width == v_o_w): 28 | return 'VALID' 29 | return None 30 | 31 | 32 | class TensorFlowNode(object): 33 | '''An intermediate representation for TensorFlow operations.''' 34 | 35 | def __init__(self, op, *args, **kwargs): 36 | # A string corresponding to the TensorFlow operation 37 | self.op = op 38 | # Positional arguments for the operation 39 | self.args = args 40 | # Keyword arguments for the operation 41 | self.kwargs = list(kwargs.items()) 42 | # The source Caffe node 43 | self.node = None 44 | 45 | def format(self, arg): 46 | '''Returns a string representation for the given value.''' 47 | return "'%s'" % arg if isinstance(arg, basestring) else str(arg) 48 | 49 | def pair(self, key, value): 50 | '''Returns key=formatted(value).''' 51 | return '%s=%s' % (key, self.format(value)) 52 | 53 | def emit(self): 54 | '''Emits the Python source for this node.''' 55 | # Format positional arguments 56 | args = map(self.format, self.args) 57 | # Format any keyword arguments 58 | if self.kwargs: 59 | args += [self.pair(k, v) for k, v in self.kwargs] 60 | # Set the node name 61 | args.append(self.pair('name', self.node.name)) 62 | args = ', '.join(args) 63 | return '%s(%s)' % (self.op, args) 64 | 65 | 66 | class MaybeActivated(object): 67 | 68 | def __init__(self, node, default=True): 69 | self.inject_kwargs = {} 70 | if node.metadata.get('relu', False) != default: 71 | self.inject_kwargs['relu'] = not default 72 | 73 | def __call__(self, *args, **kwargs): 74 | kwargs.update(self.inject_kwargs) 75 | return TensorFlowNode(*args, **kwargs) 76 | 77 | 78 | class TensorFlowMapper(NodeMapper): 79 | 80 | def get_kernel_params(self, node): 81 | kernel_params = node.layer.kernel_parameters 82 | input_shape = node.get_only_parent().output_shape 83 | padding = get_padding_type(kernel_params, input_shape, node.output_shape) 84 | # Only emit the padding if it's not the default value. 85 | padding = {'padding': padding} if padding != network.DEFAULT_PADDING else {} 86 | return (kernel_params, padding) 87 | 88 | def map_convolution(self, node): 89 | (kernel_params, kwargs) = self.get_kernel_params(node) 90 | h = kernel_params.kernel_h 91 | w = kernel_params.kernel_w 92 | c_o = node.output_shape[1] 93 | c_i = node.parents[0].output_shape[1] 94 | group = node.parameters.group 95 | if group != 1: 96 | kwargs['group'] = group 97 | if not node.parameters.bias_term: 98 | kwargs['biased'] = False 99 | assert kernel_params.kernel_h == h 100 | assert kernel_params.kernel_w == w 101 | return MaybeActivated(node)('conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o, 102 | kernel_params.stride_h, kernel_params.stride_w, **kwargs) 103 | 104 | def map_relu(self, node): 105 | return TensorFlowNode('relu') 106 | 107 | def map_pooling(self, node): 108 | pool_type = node.parameters.pool 109 | if pool_type == 0: 110 | pool_op = 'max_pool' 111 | elif pool_type == 1: 112 | pool_op = 'avg_pool' 113 | else: 114 | # Stochastic pooling, for instance. 115 | raise KaffeError('Unsupported pooling type.') 116 | (kernel_params, padding) = self.get_kernel_params(node) 117 | return TensorFlowNode(pool_op, kernel_params.kernel_h, kernel_params.kernel_w, 118 | kernel_params.stride_h, kernel_params.stride_w, **padding) 119 | 120 | def map_inner_product(self, node): 121 | #TODO: Axis 122 | assert node.parameters.axis == 1 123 | #TODO: Unbiased 124 | assert node.parameters.bias_term == True 125 | return MaybeActivated(node)('fc', node.parameters.num_output) 126 | 127 | def map_softmax(self, node): 128 | return TensorFlowNode('softmax') 129 | 130 | def map_lrn(self, node): 131 | params = node.parameters 132 | # The window size must be an odd value. For a window 133 | # size of (2*n+1), TensorFlow defines depth_radius = n. 134 | assert params.local_size % 2 == 1 135 | # Caffe scales by (alpha/(2*n+1)), whereas TensorFlow 136 | # just scales by alpha (as does Krizhevsky's paper). 137 | # We'll account for that here. 138 | alpha = params.alpha / float(params.local_size) 139 | return TensorFlowNode('lrn', int(params.local_size / 2), alpha, params.beta) 140 | 141 | def map_concat(self, node): 142 | axis = (2, 3, 1, 0)[node.parameters.axis] 143 | return TensorFlowNode('concat', axis) 144 | 145 | def map_dropout(self, node): 146 | return TensorFlowNode('dropout', node.parameters.dropout_ratio) 147 | 148 | def map_batch_norm(self, node): 149 | scale_offset = len(node.data) == 4 150 | kwargs = {'is_training': True} if scale_offset else {'is_training': True, 'scale': False} 151 | return MaybeActivated(node, default=False)('batch_normalization', **kwargs) 152 | 153 | def map_eltwise(self, node): 154 | operations = {0: 'multiply', 1: 'add', 2: 'max'} 155 | op_code = node.parameters.operation 156 | try: 157 | return TensorFlowNode(operations[op_code]) 158 | except KeyError: 159 | raise KaffeError('Unknown elementwise operation: {}'.format(op_code)) 160 | 161 | def commit(self, chains): 162 | return chains 163 | 164 | 165 | class TensorFlowEmitter(object): 166 | 167 | def __init__(self, tab=None): 168 | self.tab = tab or ' ' * 4 169 | self.prefix = '' 170 | 171 | def indent(self): 172 | self.prefix += self.tab 173 | 174 | def outdent(self): 175 | self.prefix = self.prefix[:-len(self.tab)] 176 | 177 | def statement(self, s): 178 | return self.prefix + s + '\n' 179 | 180 | def emit_imports(self): 181 | return self.statement('from kaffe.tensorflow import Network\n') 182 | 183 | def emit_class_def(self, name): 184 | return self.statement('class %s(Network):' % (name)) 185 | 186 | def emit_setup_def(self): 187 | return self.statement('def setup(self):') 188 | 189 | def emit_parents(self, chain): 190 | assert len(chain) 191 | s = '(self.feed(' 192 | sep = ', \n' + self.prefix + (' ' * len(s)) 193 | s += sep.join(["'%s'" % parent.name for parent in chain[0].node.parents]) 194 | return self.statement(s + ')') 195 | 196 | def emit_node(self, node): 197 | return self.statement(' ' * 5 + '.' + node.emit()) 198 | 199 | def emit(self, name, chains): 200 | s = self.emit_imports() 201 | s += self.emit_class_def(name) 202 | self.indent() 203 | s += self.emit_setup_def() 204 | self.indent() 205 | blocks = [] 206 | for chain in chains: 207 | b = '' 208 | b += self.emit_parents(chain) 209 | for node in chain: 210 | b += self.emit_node(node) 211 | blocks.append(b[:-1] + ')') 212 | s = s + '\n\n'.join(blocks) 213 | return s 214 | 215 | 216 | class TensorFlowTransformer(object): 217 | 218 | def __init__(self, def_path, data_path, verbose=True, phase='test'): 219 | self.verbose = verbose 220 | self.phase = phase 221 | self.load(def_path, data_path, phase) 222 | self.params = None 223 | self.source = None 224 | 225 | def load(self, def_path, data_path, phase): 226 | # Build the graph 227 | graph = GraphBuilder(def_path, phase).build() 228 | 229 | if data_path is not None: 230 | # Load and associate learned parameters 231 | graph = DataInjector(def_path, data_path)(graph) 232 | 233 | # Transform the graph 234 | transformers = [ 235 | # Fuse split batch normalization layers 236 | BatchNormScaleBiasFuser(), 237 | 238 | # Fuse ReLUs 239 | # TODO: Move non-linearity application to layer wrapper, allowing 240 | # any arbitrary operation to be optionally activated. 241 | ReLUFuser(allowed_parent_types=[NodeKind.Convolution, NodeKind.InnerProduct, 242 | NodeKind.BatchNorm]), 243 | 244 | # Rename nodes 245 | # Slashes are used for scoping in TensorFlow. Replace slashes 246 | # in node names with underscores. 247 | # (Caffe's GoogLeNet implementation uses slashes) 248 | NodeRenamer(lambda node: node.name.replace('/', '_')) 249 | ] 250 | self.graph = graph.transformed(transformers) 251 | 252 | # Display the graph 253 | if self.verbose: 254 | print_stderr(self.graph) 255 | 256 | def transform_data(self): 257 | if self.params is None: 258 | transformers = [ 259 | 260 | # Reshape the parameters to TensorFlow's ordering 261 | DataReshaper({ 262 | # (c_o, c_i, h, w) -> (h, w, c_i, c_o) 263 | NodeKind.Convolution: (2, 3, 1, 0), 264 | 265 | # (c_o, c_i) -> (c_i, c_o) 266 | NodeKind.InnerProduct: (1, 0) 267 | }), 268 | 269 | # Pre-process batch normalization data 270 | BatchNormPreprocessor(), 271 | 272 | # Convert parameters to dictionaries 273 | ParameterNamer(), 274 | ] 275 | self.graph = self.graph.transformed(transformers) 276 | self.params = {node.name: node.data for node in self.graph.nodes if node.data} 277 | return self.params 278 | 279 | def transform_source(self): 280 | if self.source is None: 281 | mapper = TensorFlowMapper(self.graph) 282 | chains = mapper.map() 283 | emitter = TensorFlowEmitter() 284 | self.source = emitter.emit(self.graph.name, chains) 285 | return self.source 286 | -------------------------------------------------------------------------------- /kaffe/transformers.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A collection of graph transforms. 3 | 4 | A transformer is a callable that accepts a graph and returns a transformed version. 5 | ''' 6 | 7 | import numpy as np 8 | 9 | from .caffe import get_caffe_resolver, has_pycaffe 10 | from .errors import KaffeError, print_stderr 11 | from .layers import NodeKind 12 | 13 | 14 | class DataInjector(object): 15 | ''' 16 | Associates parameters loaded from a .caffemodel file with their corresponding nodes. 17 | ''' 18 | 19 | def __init__(self, def_path, data_path): 20 | # The .prototxt file defining the graph 21 | self.def_path = def_path 22 | # The .caffemodel file containing the learned parameters 23 | self.data_path = data_path 24 | # Set to true if the fallback protocol-buffer based backend was used 25 | self.did_use_pb = False 26 | # A list containing (layer name, parameters) tuples 27 | self.params = None 28 | # Load the parameters 29 | self.load() 30 | 31 | def load(self): 32 | if has_pycaffe(): 33 | self.load_using_caffe() 34 | else: 35 | self.load_using_pb() 36 | 37 | def load_using_caffe(self): 38 | caffe = get_caffe_resolver().caffe 39 | net = caffe.Net(self.def_path, self.data_path, caffe.TEST) 40 | data = lambda blob: blob.data 41 | self.params = [(k, map(data, v)) for k, v in net.params.items()] 42 | 43 | def load_using_pb(self): 44 | data = get_caffe_resolver().NetParameter() 45 | data.MergeFromString(open(self.data_path, 'rb').read()) 46 | pair = lambda layer: (layer.name, self.normalize_pb_data(layer)) 47 | layers = data.layers or data.layer 48 | self.params = [pair(layer) for layer in layers if layer.blobs] 49 | self.did_use_pb = True 50 | 51 | def normalize_pb_data(self, layer): 52 | transformed = [] 53 | for blob in layer.blobs: 54 | if len(blob.shape.dim): 55 | dims = blob.shape.dim 56 | c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims)) 57 | else: 58 | c_o = blob.num 59 | c_i = blob.channels 60 | h = blob.height 61 | w = blob.width 62 | data = np.array(blob.data, dtype=np.float32).reshape(c_o, c_i, h, w) 63 | transformed.append(data) 64 | return transformed 65 | 66 | def adjust_parameters(self, node, data): 67 | if not self.did_use_pb: 68 | return data 69 | # When using the protobuf-backend, each parameter initially has four dimensions. 70 | # In certain cases (like FC layers), we want to eliminate the singleton dimensions. 71 | # This implementation takes care of the common cases. However, it does leave the 72 | # potential for future issues. 73 | # The Caffe-backend does not suffer from this problem. 74 | data = list(data) 75 | squeeze_indices = [1] # Squeeze biases. 76 | if node.kind == NodeKind.InnerProduct: 77 | squeeze_indices.append(0) # Squeeze FC. 78 | for idx in squeeze_indices: 79 | data[idx] = np.squeeze(data[idx]) 80 | return data 81 | 82 | def __call__(self, graph): 83 | for layer_name, data in self.params: 84 | if layer_name in graph: 85 | node = graph.get_node(layer_name) 86 | node.data = self.adjust_parameters(node, data) 87 | else: 88 | print_stderr('Ignoring parameters for non-existent layer: %s' % layer_name) 89 | return graph 90 | 91 | 92 | class DataReshaper(object): 93 | 94 | def __init__(self, mapping, replace=True): 95 | # A dictionary mapping NodeKind to the transposed order. 96 | self.mapping = mapping 97 | # The node kinds eligible for reshaping 98 | self.reshaped_node_types = self.mapping.keys() 99 | # If true, the reshaped data will replace the old one. 100 | # Otherwise, it's set to the reshaped_data attribute. 101 | self.replace = replace 102 | 103 | def has_spatial_parent(self, node): 104 | try: 105 | parent = node.get_only_parent() 106 | s = parent.output_shape 107 | return s.height > 1 or s.width > 1 108 | except KaffeError: 109 | return False 110 | 111 | def map(self, node_kind): 112 | try: 113 | return self.mapping[node_kind] 114 | except KeyError: 115 | raise KaffeError('Ordering not found for node kind: {}'.format(node_kind)) 116 | 117 | def __call__(self, graph): 118 | for node in graph.nodes: 119 | if node.data is None: 120 | continue 121 | if node.kind not in self.reshaped_node_types: 122 | # Check for 2+ dimensional data 123 | if any(len(tensor.shape) > 1 for tensor in node.data): 124 | print_stderr('Warning: parmaters not reshaped for node: {}'.format(node)) 125 | continue 126 | transpose_order = self.map(node.kind) 127 | weights = node.data[0] 128 | if (node.kind == NodeKind.InnerProduct) and self.has_spatial_parent(node): 129 | # The FC layer connected to the spatial layer needs to be 130 | # re-wired to match the new spatial ordering. 131 | in_shape = node.get_only_parent().output_shape 132 | fc_shape = weights.shape 133 | output_channels = fc_shape[0] 134 | weights = weights.reshape((output_channels, in_shape.channels, in_shape.height, 135 | in_shape.width)) 136 | weights = weights.transpose(self.map(NodeKind.Convolution)) 137 | node.reshaped_data = weights.reshape(fc_shape[transpose_order[0]], 138 | fc_shape[transpose_order[1]]) 139 | else: 140 | node.reshaped_data = weights.transpose(transpose_order) 141 | 142 | if self.replace: 143 | for node in graph.nodes: 144 | if hasattr(node, 'reshaped_data'): 145 | # Set the weights 146 | node.data[0] = node.reshaped_data 147 | del node.reshaped_data 148 | return graph 149 | 150 | 151 | class SubNodeFuser(object): 152 | ''' 153 | An abstract helper for merging a single-child with its single-parent. 154 | ''' 155 | 156 | def __call__(self, graph): 157 | nodes = graph.nodes 158 | fused_nodes = [] 159 | for node in nodes: 160 | if len(node.parents) != 1: 161 | # We're only fusing nodes with single parents 162 | continue 163 | parent = node.get_only_parent() 164 | if len(parent.children) != 1: 165 | # We can only fuse a node if its parent's 166 | # value isn't used by any other node. 167 | continue 168 | if not self.is_eligible_pair(parent, node): 169 | continue 170 | # Rewrite the fused node's children to its parent. 171 | for child in node.children: 172 | child.parents.remove(node) 173 | parent.add_child(child) 174 | # Disconnect the fused node from the graph. 175 | parent.children.remove(node) 176 | fused_nodes.append(node) 177 | # Let the sub-class merge the fused node in any arbitrary way. 178 | self.merge(parent, node) 179 | transformed_nodes = [node for node in nodes if node not in fused_nodes] 180 | return graph.replaced(transformed_nodes) 181 | 182 | def is_eligible_pair(self, parent, child): 183 | '''Returns true if this parent/child pair is eligible for fusion.''' 184 | raise NotImplementedError('Must be implemented by subclass.') 185 | 186 | def merge(self, parent, child): 187 | '''Merge the child node into the parent.''' 188 | raise NotImplementedError('Must be implemented by subclass') 189 | 190 | 191 | class ReLUFuser(SubNodeFuser): 192 | ''' 193 | Fuses rectified linear units with their parent nodes. 194 | ''' 195 | 196 | def __init__(self, allowed_parent_types=None): 197 | # Fuse ReLUs when the parent node is one of the given types. 198 | # If None, all node types are eligible. 199 | self.allowed_parent_types = allowed_parent_types 200 | 201 | def is_eligible_pair(self, parent, child): 202 | return ((self.allowed_parent_types is None or parent.kind in self.allowed_parent_types) and 203 | child.kind == NodeKind.ReLU) 204 | 205 | def merge(self, parent, _): 206 | parent.metadata['relu'] = True 207 | 208 | 209 | class BatchNormScaleBiasFuser(SubNodeFuser): 210 | ''' 211 | The original batch normalization paper includes two learned 212 | parameters: a scaling factor \gamma and a bias \beta. 213 | Caffe's implementation does not include these two. However, it is commonly 214 | replicated by adding a scaling+bias layer immidiately after the batch norm. 215 | 216 | This fuser merges the scaling+bias layer with the batch norm. 217 | ''' 218 | 219 | def is_eligible_pair(self, parent, child): 220 | return (parent.kind == NodeKind.BatchNorm and child.kind == NodeKind.Scale and 221 | child.parameters.axis == 1 and child.parameters.bias_term == True) 222 | 223 | def merge(self, parent, child): 224 | parent.scale_bias_node = child 225 | 226 | 227 | class BatchNormPreprocessor(object): 228 | ''' 229 | Prescale batch normalization parameters. 230 | Concatenate gamma (scale) and beta (bias) terms if set. 231 | ''' 232 | 233 | def __call__(self, graph): 234 | for node in graph.nodes: 235 | if node.kind != NodeKind.BatchNorm: 236 | continue 237 | assert node.data is not None 238 | assert len(node.data) == 3 239 | mean, variance, scale = node.data 240 | # Prescale the stats 241 | scaling_factor = 1.0 / scale if scale != 0 else 0 242 | mean *= scaling_factor 243 | variance *= scaling_factor 244 | # Replace with the updated values 245 | node.data = [mean, variance] 246 | if hasattr(node, 'scale_bias_node'): 247 | # Include the scale and bias terms 248 | gamma, beta = node.scale_bias_node.data 249 | node.data += [gamma, beta] 250 | return graph 251 | 252 | 253 | class NodeRenamer(object): 254 | ''' 255 | Renames nodes in the graph using a given unary function that 256 | accepts a node and returns its new name. 257 | ''' 258 | 259 | def __init__(self, renamer): 260 | self.renamer = renamer 261 | 262 | def __call__(self, graph): 263 | for node in graph.nodes: 264 | node.name = self.renamer(node) 265 | return graph 266 | 267 | 268 | class ParameterNamer(object): 269 | ''' 270 | Convert layer data arrays to a dictionary mapping parameter names to their values. 271 | ''' 272 | 273 | def __call__(self, graph): 274 | for node in graph.nodes: 275 | if node.data is None: 276 | continue 277 | if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct): 278 | names = ('weights',) 279 | if node.parameters.bias_term: 280 | names += ('biases',) 281 | elif node.kind == NodeKind.BatchNorm: 282 | names = ('moving_mean', 'moving_variance') 283 | if len(node.data) == 4: 284 | names += ('gamma', 'beta') 285 | else: 286 | print_stderr('WARNING: Unhandled parameters: {}'.format(node.kind)) 287 | continue 288 | assert len(names) == len(node.data) 289 | node.data = dict(zip(names, node.data)) 290 | return graph 291 | -------------------------------------------------------------------------------- /train_JPPNet-s2.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import time 4 | import tensorflow as tf 5 | import numpy as np 6 | import random 7 | from utils import * 8 | from LIP_model import * 9 | 10 | # Set gpus 11 | gpus = [0] # Here I set CUDA to only see one GPU 12 | os.environ["CUDA_VISIBLE_DEVICES"]=','.join([str(i) for i in gpus]) 13 | num_gpus = len(gpus) # number of GPUs to use 14 | 15 | ### parameters setting 16 | N_CLASSES = 20 17 | INPUT_SIZE = (384, 384) 18 | BATCH_SIZE = 1 19 | BATCH_I = 1 20 | SHUFFLE = True 21 | RANDOM_SCALE = True 22 | RANDOM_MIRROR = True 23 | LEARNING_RATE = 1e-4 24 | MOMENTUM = 0.9 25 | POWER = 0.9 26 | NUM_STEPS = 7616 * 35 + 1 27 | SAVE_PRED_EVERY = 7616 28 | p_Weight = 1 29 | s_Weight = 1 30 | DATA_DIR = './datasets/lip' 31 | LIST_PATH = './datasets/lip/list/train_rev.txt' 32 | DATA_ID_LIST = './datasets/lip/list/train_id.txt' 33 | SNAPSHOT_DIR = './checkpoint/JPPNet-s2' 34 | LOG_DIR = './logs/JPPNet-s2' 35 | 36 | 37 | def main(): 38 | RANDOM_SEED = random.randint(1000, 9999) 39 | tf.set_random_seed(RANDOM_SEED) 40 | 41 | # Create queue coordinator. 42 | coord = tf.train.Coordinator() 43 | h, w = INPUT_SIZE 44 | # Load reader. 45 | with tf.name_scope("create_inputs"): 46 | reader = LIPReader(DATA_DIR, LIST_PATH, DATA_ID_LIST, INPUT_SIZE, RANDOM_SCALE, RANDOM_MIRROR, SHUFFLE, coord) 47 | image_batch, label_batch, heatmap_batch = reader.dequeue(BATCH_SIZE) 48 | image_batch075 = tf.image.resize_images(image_batch, [int(h * 0.75), int(w * 0.75)]) 49 | image_batch050 = tf.image.resize_images(image_batch, [int(h * 0.5), int(w * 0.5)]) 50 | heatmap_batch = tf.scalar_mul(1.0/255, heatmap_batch) 51 | 52 | tower_grads = [] 53 | reuse1 = False 54 | reuse2 = False 55 | # Define loss and optimisation parameters. 56 | base_lr = tf.constant(LEARNING_RATE) 57 | step_ph = tf.placeholder(dtype=tf.float32, shape=()) 58 | learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / NUM_STEPS), POWER)) 59 | optim = tf.train.MomentumOptimizer(learning_rate, MOMENTUM) 60 | 61 | for i in xrange (num_gpus): 62 | with tf.device('/gpu:%d' % i): 63 | with tf.name_scope('Tower_%d' % (i)) as scope: 64 | if i == 0: 65 | reuse1 = False 66 | reuse2 = True 67 | else: 68 | reuse1 = True 69 | reuse2 = True 70 | next_image = image_batch[i*BATCH_I:(i+1)*BATCH_I,:] 71 | next_image075 = image_batch075[i*BATCH_I:(i+1)*BATCH_I,:] 72 | next_image050 = image_batch050[i*BATCH_I:(i+1)*BATCH_I,:] 73 | next_heatmap = heatmap_batch[i*BATCH_I:(i+1)*BATCH_I,:] 74 | next_label = label_batch[i*BATCH_I:(i+1)*BATCH_I,:] 75 | 76 | # Create network. 77 | with tf.variable_scope('', reuse=reuse1): 78 | net_100 = JPPNetModel({'data': next_image}, is_training=False, n_classes=N_CLASSES) 79 | with tf.variable_scope('', reuse=reuse2): 80 | net_075 = JPPNetModel({'data': next_image075}, is_training=False, n_classes=N_CLASSES) 81 | with tf.variable_scope('', reuse=reuse2): 82 | net_050 = JPPNetModel({'data': next_image050}, is_training=False, n_classes=N_CLASSES) 83 | 84 | # parsing net 85 | parsing_fea1_100 = net_100.layers['res5d_branch2b_parsing'] 86 | parsing_fea1_075 = net_075.layers['res5d_branch2b_parsing'] 87 | parsing_fea1_050 = net_050.layers['res5d_branch2b_parsing'] 88 | 89 | parsing_out1_100 = net_100.layers['fc1_human'] 90 | parsing_out1_075 = net_075.layers['fc1_human'] 91 | parsing_out1_050 = net_050.layers['fc1_human'] 92 | # pose net 93 | resnet_fea_100 = net_100.layers['res4b22_relu'] 94 | resnet_fea_075 = net_075.layers['res4b22_relu'] 95 | resnet_fea_050 = net_050.layers['res4b22_relu'] 96 | 97 | with tf.variable_scope('', reuse=reuse1): 98 | pose_out1_100, pose_fea1_100 = pose_net(resnet_fea_100, 'fc1_pose') 99 | pose_out2_100, pose_fea2_100 = pose_refine(pose_out1_100, parsing_out1_100, pose_fea1_100, name='fc2_pose') 100 | parsing_out2_100, parsing_fea2_100 = parsing_refine(parsing_out1_100, pose_out1_100, parsing_fea1_100, name='fc2_parsing') 101 | parsing_out3_100, parsing_fea3_100 = parsing_refine(parsing_out2_100, pose_out2_100, parsing_fea2_100, name='fc3_parsing') 102 | pose_out3_100, pose_fea3_100 = pose_refine(pose_out2_100, parsing_out2_100, pose_fea2_100, name='fc3_pose') 103 | 104 | with tf.variable_scope('', reuse=reuse2): 105 | pose_out1_075, pose_fea1_075 = pose_net(resnet_fea_075, 'fc1_pose') 106 | pose_out2_075, pose_fea2_075 = pose_refine(pose_out1_075, parsing_out1_075, pose_fea1_075, name='fc2_pose') 107 | parsing_out2_075, parsing_fea2_075 = parsing_refine(parsing_out1_075, pose_out1_075, parsing_fea1_075, name='fc2_parsing') 108 | parsing_out3_075, parsing_fea3_075 = parsing_refine(parsing_out2_075, pose_out2_075, parsing_fea2_075, name='fc3_parsing') 109 | pose_out3_075, pose_fea3_075 = pose_refine(pose_out2_075, parsing_out2_075, pose_fea2_075, name='fc3_pose') 110 | 111 | with tf.variable_scope('', reuse=reuse2): 112 | pose_out1_050, pose_fea1_050 = pose_net(resnet_fea_050, 'fc1_pose') 113 | pose_out2_050, pose_fea2_050 = pose_refine(pose_out1_050, parsing_out1_050, pose_fea1_050, name='fc2_pose') 114 | parsing_out2_050, parsing_fea2_050 = parsing_refine(parsing_out1_050, pose_out1_050, parsing_fea1_050, name='fc2_parsing') 115 | parsing_out3_050, parsing_fea3_050 = parsing_refine(parsing_out2_050, pose_out2_050, parsing_fea2_050, name='fc3_parsing') 116 | pose_out3_050, pose_fea3_050 = pose_refine(pose_out2_050, parsing_out2_050, pose_fea2_050, name='fc3_pose') 117 | 118 | # combine resize 119 | parsing_out1 = tf.reduce_mean(tf.stack([parsing_out1_100, 120 | tf.image.resize_images(parsing_out1_075, tf.shape(parsing_out1_100)[1:3,]), 121 | tf.image.resize_images(parsing_out1_050, tf.shape(parsing_out1_100)[1:3,])]), axis=0) 122 | parsing_out2 = tf.reduce_mean(tf.stack([parsing_out2_100, 123 | tf.image.resize_images(parsing_out2_075, tf.shape(parsing_out2_100)[1:3,]), 124 | tf.image.resize_images(parsing_out2_050, tf.shape(parsing_out2_100)[1:3,])]), axis=0) 125 | parsing_out3 = tf.reduce_mean(tf.stack([parsing_out3_100, 126 | tf.image.resize_images(parsing_out3_075, tf.shape(parsing_out3_100)[1:3,]), 127 | tf.image.resize_images(parsing_out3_050, tf.shape(parsing_out3_100)[1:3,])]), axis=0) 128 | pose_out1 = tf.reduce_mean(tf.stack([pose_out1_100, 129 | tf.image.resize_nearest_neighbor(pose_out1_075, tf.shape(pose_out1_100)[1:3,]), 130 | tf.image.resize_nearest_neighbor(pose_out1_050, tf.shape(pose_out1_100)[1:3,])]), axis=0) 131 | pose_out2 = tf.reduce_mean(tf.stack([pose_out2_100, 132 | tf.image.resize_nearest_neighbor(pose_out2_075, tf.shape(pose_out2_100)[1:3,]), 133 | tf.image.resize_nearest_neighbor(pose_out2_050, tf.shape(pose_out2_100)[1:3,])]), axis=0) 134 | pose_out3 = tf.reduce_mean(tf.stack([pose_out3_100, 135 | tf.image.resize_nearest_neighbor(pose_out3_075, tf.shape(pose_out3_100)[1:3,]), 136 | tf.image.resize_nearest_neighbor(pose_out3_050, tf.shape(pose_out3_100)[1:3,])]), axis=0) 137 | 138 | ### Predictions: ignoring all predictions with labels greater or equal than n_classes 139 | raw_prediction_p1 = tf.reshape(parsing_out1, [-1, N_CLASSES]) 140 | raw_prediction_p1_100 = tf.reshape(parsing_out1_100, [-1, N_CLASSES]) 141 | raw_prediction_p1_075 = tf.reshape(parsing_out1_075, [-1, N_CLASSES]) 142 | raw_prediction_p1_050 = tf.reshape(parsing_out1_050, [-1, N_CLASSES]) 143 | 144 | raw_prediction_p2 = tf.reshape(parsing_out2, [-1, N_CLASSES]) 145 | raw_prediction_p2_100 = tf.reshape(parsing_out2_100, [-1, N_CLASSES]) 146 | raw_prediction_p2_075 = tf.reshape(parsing_out2_075, [-1, N_CLASSES]) 147 | raw_prediction_p2_050 = tf.reshape(parsing_out2_050, [-1, N_CLASSES]) 148 | 149 | raw_prediction_p3 = tf.reshape(parsing_out3, [-1, N_CLASSES]) 150 | raw_prediction_p3_100 = tf.reshape(parsing_out3_100, [-1, N_CLASSES]) 151 | raw_prediction_p3_075 = tf.reshape(parsing_out3_075, [-1, N_CLASSES]) 152 | raw_prediction_p3_050 = tf.reshape(parsing_out3_050, [-1, N_CLASSES]) 153 | 154 | label_proc = prepare_label(next_label, tf.stack(parsing_out1.get_shape()[1:3]), one_hot=False) # [batch_size, h, w] 155 | label_proc075 = prepare_label(next_label, tf.stack(parsing_out1_075.get_shape()[1:3]), one_hot=False) 156 | label_proc050 = prepare_label(next_label, tf.stack(parsing_out1_050.get_shape()[1:3]), one_hot=False) 157 | 158 | raw_gt = tf.reshape(label_proc, [-1,]) 159 | raw_gt075 = tf.reshape(label_proc075, [-1,]) 160 | raw_gt050 = tf.reshape(label_proc050, [-1,]) 161 | 162 | indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, N_CLASSES - 1)), 1) 163 | indices075 = tf.squeeze(tf.where(tf.less_equal(raw_gt075, N_CLASSES - 1)), 1) 164 | indices050 = tf.squeeze(tf.where(tf.less_equal(raw_gt050, N_CLASSES - 1)), 1) 165 | 166 | gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) 167 | gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) 168 | gt050 = tf.cast(tf.gather(raw_gt050, indices050), tf.int32) 169 | 170 | prediction_p1 = tf.gather(raw_prediction_p1, indices) 171 | prediction_p1_100 = tf.gather(raw_prediction_p1_100, indices) 172 | prediction_p1_075 = tf.gather(raw_prediction_p1_075, indices075) 173 | prediction_p1_050 = tf.gather(raw_prediction_p1_050, indices050) 174 | 175 | prediction_p2 = tf.gather(raw_prediction_p2, indices) 176 | prediction_p2_100 = tf.gather(raw_prediction_p2_100, indices) 177 | prediction_p2_075 = tf.gather(raw_prediction_p2_075, indices075) 178 | prediction_p2_050 = tf.gather(raw_prediction_p2_050, indices050) 179 | 180 | prediction_p3 = tf.gather(raw_prediction_p3, indices) 181 | prediction_p3_100 = tf.gather(raw_prediction_p3_100, indices) 182 | prediction_p3_075 = tf.gather(raw_prediction_p3_075, indices075) 183 | prediction_p3_050 = tf.gather(raw_prediction_p3_050, indices050) 184 | 185 | next_heatmap075 = tf.image.resize_nearest_neighbor(next_heatmap, pose_out1_075.get_shape()[1:3]) 186 | next_heatmap050 = tf.image.resize_nearest_neighbor(next_heatmap, pose_out1_050.get_shape()[1:3]) 187 | 188 | ### Pixel-wise softmax loss. 189 | loss_p1 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p1, labels=gt)) 190 | loss_p1_100 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p1_100, labels=gt)) 191 | loss_p1_075 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p1_075, labels=gt075)) 192 | loss_p1_050 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p1_050, labels=gt050)) 193 | 194 | loss_p2 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p2, labels=gt)) 195 | loss_p2_100 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p2_100, labels=gt)) 196 | loss_p2_075 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p2_075, labels=gt075)) 197 | loss_p2_050 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p2_050, labels=gt050)) 198 | 199 | loss_p3 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p3, labels=gt)) 200 | loss_p3_100 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p3_100, labels=gt)) 201 | loss_p3_075 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p3_075, labels=gt075)) 202 | loss_p3_050 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p3_050, labels=gt050)) 203 | 204 | loss_s1 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out1)), [1, 2, 3]))) 205 | loss_s1_100 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out1_100)), [1, 2, 3]))) 206 | loss_s1_075 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap075, pose_out1_075)), [1, 2, 3]))) 207 | loss_s1_050 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap050, pose_out1_050)), [1, 2, 3]))) 208 | 209 | loss_s2 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out2)), [1, 2, 3]))) 210 | loss_s2_100 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out2_100)), [1, 2, 3]))) 211 | loss_s2_075 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap075, pose_out2_075)), [1, 2, 3]))) 212 | loss_s2_050 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap050, pose_out2_050)), [1, 2, 3]))) 213 | 214 | loss_s3 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out3)), [1, 2, 3]))) 215 | loss_s3_100 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out3_100)), [1, 2, 3]))) 216 | loss_s3_075 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap075, pose_out3_075)), [1, 2, 3]))) 217 | loss_s3_050 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap050, pose_out3_050)), [1, 2, 3]))) 218 | 219 | loss_parsing = loss_p1 + loss_p1_100 + loss_p1_075 + loss_p1_050 + loss_p2 + loss_p2_100 + loss_p2_075 + loss_p2_050 + loss_p3 + loss_p3_100 + loss_p3_075 + loss_p3_050 220 | loss_pose = loss_s1 + loss_s1_100 + loss_s1_075 + loss_s1_050 + loss_s2 + loss_s2_100 + loss_s2_075 + loss_s2_050 + loss_s3 + loss_s3_100 + loss_s3_075 + loss_s3_050 221 | reduced_loss = loss_pose * s_Weight + loss_parsing * p_Weight 222 | 223 | trainable_variable = tf.trainable_variables() 224 | grads = optim.compute_gradients(reduced_loss, var_list=trainable_variable) 225 | 226 | tower_grads.append(grads) 227 | 228 | tf.add_to_collection('loss_p1', loss_p1) 229 | tf.add_to_collection('loss_p2', loss_p2) 230 | tf.add_to_collection('loss_p3', loss_p3) 231 | tf.add_to_collection('loss_s1', loss_s1) 232 | tf.add_to_collection('loss_s2', loss_s2) 233 | tf.add_to_collection('loss_s3', loss_s3) 234 | tf.add_to_collection('reduced_loss', reduced_loss) 235 | 236 | # Average the gradients 237 | grads_ave = average_gradients(tower_grads) 238 | # apply the gradients with our optimizers 239 | train_op = optim.apply_gradients(grads_ave) 240 | 241 | loss_p1_ave = tf.reduce_mean(tf.get_collection('loss_p1')) 242 | loss_p2_ave = tf.reduce_mean(tf.get_collection('loss_p2')) 243 | loss_p3_ave = tf.reduce_mean(tf.get_collection('loss_p3')) 244 | loss_s1_ave = tf.reduce_mean(tf.get_collection('loss_s1')) 245 | loss_s2_ave = tf.reduce_mean(tf.get_collection('loss_s2')) 246 | loss_s3_ave = tf.reduce_mean(tf.get_collection('loss_s3')) 247 | loss_ave = tf.reduce_mean(tf.get_collection('reduced_loss')) 248 | 249 | loss_summary_p1 = tf.summary.scalar("loss_p1_ave", loss_p1_ave) 250 | loss_summary_p2 = tf.summary.scalar("loss_p2_ave", loss_p2_ave) 251 | loss_summary_p3 = tf.summary.scalar("loss_p3_ave", loss_p3_ave) 252 | loss_summary_s1 = tf.summary.scalar("loss_s1_ave", loss_s1_ave) 253 | loss_summary_s2 = tf.summary.scalar("loss_s2_ave", loss_s2_ave) 254 | loss_summary_s3 = tf.summary.scalar("loss_s3_ave", loss_s3_ave) 255 | loss_summary_ave = tf.summary.scalar("loss_ave", loss_ave) 256 | loss_summary = tf.summary.merge([loss_summary_ave, loss_summary_s1, loss_summary_s2, loss_summary_s3, loss_summary_p1, loss_summary_p2, loss_summary_p3]) 257 | summary_writer = tf.summary.FileWriter(LOG_DIR, graph=tf.get_default_graph()) 258 | 259 | # Set up tf session and initialize variables. 260 | config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=False) 261 | config.gpu_options.allow_growth = True 262 | sess = tf.Session(config=config) 263 | init = tf.global_variables_initializer() 264 | sess.run(init) 265 | 266 | # Saver for storing checkpoints of the model. 267 | all_saver_var = tf.global_variables() 268 | restore_var = all_saver_var #[v for v in all_saver_var if 'pose' not in v.name and 'parsing' not in v.name] 269 | saver = tf.train.Saver(var_list=all_saver_var, max_to_keep=50) 270 | loader = tf.train.Saver(var_list=restore_var) 271 | 272 | if load(loader, sess, SNAPSHOT_DIR): 273 | print(" [*] Load SUCCESS") 274 | else: 275 | print(" [!] Load failed...") 276 | 277 | # Start queue threads. 278 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 279 | 280 | # Iterate over training steps. 281 | for step in range(NUM_STEPS): 282 | start_time = time.time() 283 | loss_value = 0 284 | feed_dict = { step_ph : step } 285 | 286 | # Apply gradients. 287 | summary, loss_value, _ = sess.run([loss_summary, reduced_loss, train_op], feed_dict=feed_dict) 288 | summary_writer.add_summary(summary, step) 289 | if step % SAVE_PRED_EVERY == 0: 290 | save(saver, sess, SNAPSHOT_DIR, step) 291 | 292 | duration = time.time() - start_time 293 | print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) 294 | coord.request_stop() 295 | coord.join(threads) 296 | 297 | def average_gradients(tower_grads): 298 | """Calculate the average gradient for each shared variable across all towers. 299 | Note that this function provides a synchronization point across all towers. 300 | Args: 301 | tower_grads: List of lists of (gradient, variable) tuples. The outer list 302 | is over individual gradients. The inner list is over the gradient 303 | calculation for each tower. 304 | Returns: 305 | List of pairs of (gradient, variable) where the gradient has been averaged 306 | across all towers. 307 | """ 308 | average_grads = [] 309 | for grad_and_vars in zip(*tower_grads): 310 | # Note that each grad_and_vars looks like the following: 311 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) 312 | grads = [] 313 | for g, _ in grad_and_vars: 314 | # Add 0 dimension to the gradients to represent the tower. 315 | expanded_g = tf.expand_dims(g, 0) 316 | 317 | # Append on a 'tower' dimension which we will average over below. 318 | grads.append(expanded_g) 319 | 320 | # Average over the 'tower' dimension. 321 | grad = tf.concat(axis=0, values=grads) 322 | grad = tf.reduce_mean(grad, 0) 323 | 324 | # Keep in mind that the Variables are redundant because they are shared 325 | # across towers. So .. we will just return the first tower's pointer to 326 | # the Variable. 327 | v = grad_and_vars[0][1] 328 | grad_and_var = (grad, v) 329 | average_grads.append(grad_and_var) 330 | return average_grads 331 | 332 | if __name__ == '__main__': 333 | main() 334 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import JPPNetModel 2 | from .image_reader import ImageReader 3 | from .utils import decode_labels, inv_preprocess, prepare_label, save, load 4 | from .ops import conv2d, max_pool, linear 5 | from .lip_reader import LIPReader -------------------------------------------------------------------------------- /utils/image_reader.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | IGNORE_LABEL = 255 7 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 8 | 9 | def image_scaling(img, label): 10 | """ 11 | Randomly scales the images between 0.5 to 1.5 times the original size. 12 | 13 | Args: 14 | img: Training image to scale. 15 | label: Segmentation mask to scale. 16 | """ 17 | 18 | scale = tf.random_uniform([1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None) 19 | h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[0]), scale)) 20 | w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[1]), scale)) 21 | new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1]) 22 | img = tf.image.resize_images(img, new_shape) 23 | label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape) 24 | label = tf.squeeze(label, squeeze_dims=[0]) 25 | 26 | return img, label 27 | 28 | def image_mirroring(img, label): 29 | """ 30 | Randomly mirrors the images. 31 | 32 | Args: 33 | img: Training image to mirror. 34 | label: Segmentation mask to mirror. 35 | """ 36 | 37 | distort_left_right_random = tf.random_uniform([1], 0, 1.0, dtype=tf.float32)[0] 38 | mirror = tf.less(tf.stack([1.0, distort_left_right_random, 1.0]), 0.5) 39 | img = tf.reverse(img, mirror) 40 | reversed_label = tf.reverse(label, mirror) 41 | 42 | return img, reversed_label 43 | 44 | def random_crop_and_pad_image_and_labels(image, label, crop_h, crop_w, ignore_label=255): 45 | """ 46 | Randomly crop and pads the input images. 47 | 48 | Args: 49 | image: Training image to crop/ pad. 50 | label: Segmentation mask to crop/ pad. 51 | crop_h: Height of cropped segment. 52 | crop_w: Width of cropped segment. 53 | ignore_label: Label to ignore during the training. 54 | """ 55 | 56 | label = tf.cast(label, dtype=tf.float32) 57 | label = label - ignore_label # Needs to be subtracted and later added due to 0 padding. 58 | combined = tf.concat([image, label], 2) 59 | image_shape = tf.shape(image) 60 | combined_pad = tf.image.pad_to_bounding_box(combined, 0, 0, tf.maximum(crop_h, image_shape[0]), tf.maximum(crop_w, image_shape[1])) 61 | 62 | last_image_dim = tf.shape(image)[-1] 63 | last_label_dim = tf.shape(label)[-1] 64 | combined_crop = tf.random_crop(combined_pad, [crop_h,crop_w,4]) 65 | img_crop = combined_crop[:, :, :last_image_dim] 66 | label_crop = combined_crop[:, :, last_image_dim:] 67 | label_crop = label_crop + ignore_label 68 | label_crop = tf.cast(label_crop, dtype=tf.uint8) 69 | 70 | # Set static shape so that tensorflow knows shape at compile time. 71 | img_crop.set_shape((crop_h, crop_w, 3)) 72 | label_crop.set_shape((crop_h,crop_w, 1)) 73 | return img_crop, label_crop 74 | 75 | def read_labeled_image_list(data_dir, data_list): 76 | """Reads txt file containing paths to images and ground truth masks. 77 | 78 | Args: 79 | data_dir: path to the directory with images and masks. 80 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'. 81 | 82 | Returns: 83 | Two lists with all file names for images and masks, respectively. 84 | """ 85 | f = open(data_list, 'r') 86 | images = [] 87 | for line in f: 88 | try: 89 | image, mask = line.strip("\n").split(' ') 90 | except ValueError: # Adhoc for test. 91 | image = line.strip("\n") 92 | images.append(data_dir + image) 93 | return images 94 | 95 | def read_images_from_disk(input_queue, input_size, random_scale, random_mirror): # optional pre-processing arguments 96 | """Read one image and its corresponding mask with optional pre-processing. 97 | 98 | Args: 99 | input_queue: tf queue with paths to the image and its mask. 100 | input_size: a tuple with (height, width) values. 101 | If not given, return images of original size. 102 | random_scale: whether to randomly scale the images prior 103 | to random crop. 104 | random_mirror: whether to randomly mirror the images prior 105 | to random crop. 106 | 107 | Returns: 108 | Two tensors: the decoded image and its mask. 109 | """ 110 | 111 | img_contents = tf.read_file(input_queue[0]) 112 | 113 | img = tf.image.decode_jpeg(img_contents, channels=3) 114 | img_r, img_g, img_b = tf.split(value=img, num_or_size_splits=3, axis=2) 115 | img = tf.cast(tf.concat([img_b, img_g, img_r], 2), dtype=tf.float32) 116 | # Extract mean. 117 | img -= IMG_MEAN 118 | 119 | return img 120 | 121 | class ImageReader(object): 122 | '''Generic ImageReader which reads images and corresponding segmentation 123 | masks from the disk, and enqueues them into a TensorFlow queue. 124 | ''' 125 | 126 | def __init__(self, data_dir, data_list, input_size, random_scale, 127 | random_mirror, coord): 128 | '''Initialise an ImageReader. 129 | 130 | Args: 131 | data_dir: path to the directory with images and masks. 132 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'. 133 | input_size: a tuple with (height, width) values, to which all the images will be resized. 134 | random_scale: whether to randomly scale the images prior to random crop. 135 | random_mirror: whether to randomly mirror the images prior to random crop. 136 | coord: TensorFlow queue coordinator. 137 | ''' 138 | self.data_dir = data_dir 139 | self.data_list = data_list 140 | self.input_size = input_size 141 | self.coord = coord 142 | 143 | self.image_list = read_labeled_image_list(self.data_dir, self.data_list) 144 | self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string) 145 | self.queue = tf.train.slice_input_producer([self.images], 146 | shuffle=input_size is not None) # not shuffling if it is val 147 | self.image = read_images_from_disk(self.queue, self.input_size, random_scale, random_mirror) 148 | 149 | def dequeue(self, num_elements): 150 | '''Pack images and labels into a batch. 151 | 152 | Args: 153 | num_elements: the batch size. 154 | 155 | Returns: 156 | Two tensors of size (batch_size, h, w, {3, 1}) for images and masks.''' 157 | image_batch, label_batch = tf.train.batch([self.image, self.label], 158 | num_elements) 159 | return image_batch, label_batch 160 | -------------------------------------------------------------------------------- /utils/lip_reader.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import random 6 | 7 | IGNORE_LABEL = 255 8 | NUM_POSE = 16 9 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 10 | 11 | def image_scaling(img, label, heatmap): 12 | """ 13 | Randomly scales the images between 0.5 to 1.5 times the original size. 14 | 15 | Args: 16 | img: Training image to scale. 17 | label: Segmentation mask to scale. 18 | """ 19 | 20 | scale = tf.random_uniform([1], minval=0.75, maxval=1.25, dtype=tf.float32, seed=None) 21 | h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[0]), scale)) 22 | w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[1]), scale)) 23 | 24 | new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1]) 25 | img = tf.image.resize_images(img, new_shape) 26 | label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape) 27 | label = tf.squeeze(label, squeeze_dims=[0]) 28 | heatmap = tf.image.resize_nearest_neighbor(heatmap, new_shape) 29 | return img, label, heatmap 30 | 31 | def image_mirroring(img, label, label_rev, heatmap, heatmap_rev): 32 | """ 33 | Randomly mirrors the images. 34 | 35 | Args: 36 | img: Training image to mirror. 37 | label: Segmentation mask to mirror. 38 | """ 39 | 40 | distort_left_right_random = tf.random_uniform([1], 0, 1.0, dtype=tf.float32)[0] 41 | mirror = tf.less(tf.stack([1.0, distort_left_right_random, 1.0]), 0.5) 42 | mirror = tf.boolean_mask([0, 1, 2], mirror) 43 | img = tf.reverse(img, mirror) 44 | 45 | flag = tf.less(distort_left_right_random, 0.5) 46 | mask = tf.stack([tf.logical_not(flag), flag]) 47 | 48 | label_and_rev = tf.stack([label, label_rev]) 49 | label_ = tf.boolean_mask(label_and_rev, mask) 50 | label_ = tf.reshape(label_, tf.shape(label)) 51 | 52 | heatmap_and_rev = tf.stack([heatmap, heatmap_rev]) 53 | heatmap_ = tf.boolean_mask(heatmap_and_rev, mask) 54 | heatmap_ = tf.reshape(heatmap_, tf.shape(heatmap)) 55 | 56 | return img, label_, heatmap_ 57 | 58 | def random_resize_img_labels(image, label, heatmap, resized_h, resized_w): 59 | 60 | scale = tf.random_uniform([1], minval=0.75, maxval=1.25, dtype=tf.float32, seed=None) 61 | h_new = tf.to_int32(tf.multiply(tf.to_float(resized_h), scale)) 62 | w_new = tf.to_int32(tf.multiply(tf.to_float(resized_w), scale)) 63 | 64 | new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1]) 65 | img = tf.image.resize_images(image, new_shape) 66 | label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape) 67 | label = tf.squeeze(label, squeeze_dims=[0]) 68 | heatmap = tf.image.resize_nearest_neighbor(tf.expand_dims(heatmap, 0), new_shape) 69 | heatmap = tf.squeeze(heatmap, squeeze_dims=[0]) 70 | return img, label, heatmap 71 | 72 | def resize_img_labels(image, label, heatmap, resized_h, resized_w): 73 | 74 | new_shape = tf.stack([tf.to_int32(resized_h), tf.to_int32(resized_w)]) 75 | img = tf.image.resize_images(image, new_shape) 76 | label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape) 77 | label = tf.squeeze(label, squeeze_dims=[0]) 78 | new_shape = tf.stack([tf.to_int32(resized_h / 8.0), tf.to_int32(resized_w / 8.0)]) 79 | heatmap = tf.image.resize_nearest_neighbor(tf.expand_dims(heatmap, 0), new_shape) 80 | heatmap = tf.squeeze(heatmap, squeeze_dims=[0]) 81 | return img, label, heatmap 82 | 83 | def random_crop_and_pad_image_and_labels(image, label, heatmap, crop_h, crop_w, ignore_label=255): 84 | """ 85 | Randomly crop and pads the input images. 86 | 87 | Args: 88 | image: Training image to crop/ pad. 89 | label: Segmentation mask to crop/ pad. 90 | crop_h: Height of cropped segment. 91 | crop_w: Width of cropped segment. 92 | ignore_label: Label to ignore during the training. 93 | """ 94 | 95 | label = tf.cast(label, dtype=tf.float32) 96 | label = label - ignore_label # Needs to be subtracted and later added due to 0 padding. 97 | heatmap = tf.cast(heatmap, dtype=tf.float32) 98 | combined = tf.concat([image, label, heatmap], 2) 99 | image_shape = tf.shape(image) 100 | combined_pad = tf.image.pad_to_bounding_box(combined, 0, 0, tf.maximum(crop_h, image_shape[0]), tf.maximum(crop_w, image_shape[1])) 101 | 102 | last_image_dim = tf.shape(image)[-1] 103 | last_label_dim = tf.shape(label)[-1] 104 | combined_crop = tf.random_crop(combined_pad, [crop_h,crop_w,4+NUM_POSE]) 105 | img_crop = combined_crop[:, :, :last_image_dim] 106 | label_crop = combined_crop[:, :, last_image_dim:last_image_dim+last_label_dim] 107 | heatmap_crop = combined_crop[:, :, last_image_dim+last_label_dim:] 108 | label_crop = label_crop + ignore_label 109 | label_crop = tf.cast(label_crop, dtype=tf.uint8) 110 | 111 | # Set static shape so that tensorflow knows shape at compile time. 112 | img_crop.set_shape((crop_h, crop_w, 3)) 113 | label_crop.set_shape((crop_h,crop_w, 1)) 114 | heatmap_crop.set_shape((crop_h, crop_w, NUM_POSE)) 115 | new_shape = tf.stack([tf.to_int32(crop_h / 8.0), tf.to_int32(crop_w / 8.0)]) 116 | heatmap = tf.image.resize_nearest_neighbor(tf.expand_dims(heatmap_crop, 0), new_shape) 117 | heatmap = tf.squeeze(heatmap, squeeze_dims=[0]) 118 | return img_crop, label_crop, heatmap 119 | 120 | 121 | def read_labeled_image_list(data_dir, data_list): 122 | """Reads txt file containing paths to images and ground truth masks. 123 | 124 | Args: 125 | data_dir: path to the directory with images and masks. 126 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'. 127 | 128 | Returns: 129 | Two lists with all file names for images and masks, respectively. 130 | """ 131 | f = open(data_list, 'r') 132 | images = [] 133 | masks = [] 134 | masks_rev = [] 135 | for line in f: 136 | try: 137 | image, mask, mask_rev = line.strip("\n").split(' ') 138 | except ValueError: # Adhoc for test. 139 | image = mask = mask_rev = line.strip("\n") 140 | images.append(data_dir + image) 141 | masks.append(data_dir + mask) 142 | masks_rev.append(data_dir + mask_rev) 143 | return images, masks, masks_rev 144 | 145 | def read_pose_list(data_dir, data_id_list): 146 | f = open(data_id_list, 'r') 147 | poses = [] 148 | for line in f: 149 | pose = line.strip("\n") 150 | poses.append(data_dir + '/heatmap/' + pose) 151 | return poses 152 | 153 | 154 | def read_images_from_disk(input_queue, input_size, random_scale, random_mirror=False): # optional pre-processing arguments 155 | """Read one image and its corresponding mask with optional pre-processing. 156 | 157 | Args: 158 | input_queue: tf queue with paths to the image and its mask. 159 | input_size: a tuple with (height, width) values. 160 | If not given, return images of original size. 161 | random_scale: whether to randomly scale the images prior 162 | to random crop. 163 | random_mirror: whether to randomly mirror the images prior 164 | to random crop. 165 | 166 | Returns: 167 | Two tensors: the decoded image and its mask. 168 | """ 169 | 170 | img_contents = tf.read_file(input_queue[0]) 171 | label_contents = tf.read_file(input_queue[1]) 172 | label_contents_rev = tf.read_file(input_queue[2]) 173 | 174 | img = tf.image.decode_jpeg(img_contents, channels=3) 175 | img_r, img_g, img_b = tf.split(value=img, num_or_size_splits=3, axis=2) 176 | img = tf.cast(tf.concat([img_b, img_g, img_r], 2), dtype=tf.float32) 177 | # Extract mean. 178 | img -= IMG_MEAN 179 | 180 | label = tf.image.decode_png(label_contents, channels=1) 181 | label_rev = tf.image.decode_png(label_contents_rev, channels=1) 182 | 183 | pose_id = input_queue[3] 184 | pose = [] 185 | for i in xrange(NUM_POSE): 186 | pose_contents = tf.read_file(pose_id+'_{}.png'.format(i)) 187 | pose_i = tf.image.decode_png(pose_contents, channels=1) 188 | pose.append(pose_i) 189 | heatmap = tf.concat(pose, axis=2) 190 | 191 | # create reversed heatmap 192 | pose_rev = [None] * 16 193 | pose_rev[0] = pose[5] 194 | pose_rev[1] = pose[4] 195 | pose_rev[2] = pose[3] 196 | pose_rev[3] = pose[2] 197 | pose_rev[4] = pose[1] 198 | pose_rev[5] = pose[0] 199 | pose_rev[10] = pose[15] 200 | pose_rev[11] = pose[14] 201 | pose_rev[12] = pose[13] 202 | pose_rev[13] = pose[12] 203 | pose_rev[14] = pose[11] 204 | pose_rev[15] = pose[10] 205 | pose_rev[6] = pose[6] 206 | pose_rev[7] = pose[7] 207 | pose_rev[8] = pose[8] 208 | pose_rev[9] = pose[9] 209 | heatmap_rev = tf.concat(pose_rev, axis=2) 210 | heatmap_rev = tf.reverse(heatmap_rev, tf.stack([1])) 211 | 212 | if input_size is not None: 213 | h, w = input_size 214 | 215 | # Randomly mirror the images and labels. 216 | if random_mirror: 217 | img, label, heatmap = image_mirroring(img, label, label_rev, heatmap, heatmap_rev) 218 | 219 | # Randomly resize the images and labels. 220 | if random_scale: 221 | img, label, heatmap = random_resize_img_labels(img, label, heatmap, h, w) 222 | # Random scale must be followed by crop to create fixed size 223 | img, label, heatmap = random_crop_and_pad_image_and_labels(img, label, heatmap, h, w, IGNORE_LABEL) 224 | else: 225 | img, label, heatmap = resize_img_labels(img, label, heatmap, h, w) 226 | 227 | return img, label, heatmap 228 | 229 | class LIPReader(object): 230 | '''Generic ImageReader which reads images and corresponding segmentation 231 | masks from the disk, and enqueues them into a TensorFlow queue. 232 | ''' 233 | 234 | def __init__(self, data_dir, data_list, data_id_list, input_size, random_scale, 235 | random_mirror, shuffle, coord): 236 | '''Initialise an ImageReader. 237 | 238 | Args: 239 | data_dir: path to the directory with images and masks. 240 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'. 241 | data_id_list: path to the file of image id. 242 | input_size: a tuple with (height, width) values, to which all the images will be resized. 243 | random_scale: whether to randomly scale the images prior to random crop. 244 | random_mirror: whether to randomly mirror the images prior to random crop. 245 | coord: TensorFlow queue coordinator. 246 | ''' 247 | self.data_dir = data_dir 248 | self.data_list = data_list 249 | self.data_id_list = data_id_list 250 | self.input_size = input_size 251 | self.coord = coord 252 | 253 | self.image_list, self.label_list, self.label_rev_list = read_labeled_image_list(self.data_dir, self.data_list) 254 | self.pose_list = read_pose_list(self.data_dir, self.data_id_list) 255 | self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string) 256 | self.labels = tf.convert_to_tensor(self.label_list, dtype=tf.string) 257 | self.labels_rev = tf.convert_to_tensor(self.label_rev_list, dtype=tf.string) 258 | self.poses = tf.convert_to_tensor(self.pose_list, dtype=tf.string) 259 | self.queue = tf.train.slice_input_producer([self.images, self.labels, self.labels_rev, self.poses], shuffle=shuffle) 260 | self.image, self.label, self.heatmap = read_images_from_disk(self.queue, self.input_size, random_scale, random_mirror) 261 | 262 | def dequeue(self, num_elements): 263 | '''Pack images and labels into a batch. 264 | 265 | Args: 266 | num_elements: the batch size. 267 | 268 | Returns: 269 | Two tensors of size (batch_size, h, w, {3, 1}) for images and masks.''' 270 | batch_list = [self.image, self.label, self.heatmap] 271 | image_batch, label_batch, heatmap_batch = tf.train.batch([self.image, self.label, self.heatmap], num_elements) 272 | return image_batch, label_batch, heatmap_batch 273 | -------------------------------------------------------------------------------- /utils/model.py: -------------------------------------------------------------------------------- 1 | # Converted to TensorFlow .caffemodel 2 | # with the DeepLab-ResNet configuration. 3 | # The batch normalisation layer is provided by 4 | # the slim library (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim). 5 | 6 | from kaffe.tensorflow import Network 7 | import tensorflow as tf 8 | 9 | class JPPNetModel(Network): 10 | def setup(self, is_training, n_classes): 11 | '''Network definition. 12 | 13 | Args: 14 | is_training: whether to update the running mean and variance of the batch normalisation layer. 15 | If the batch size is small, it is better to keep the running mean and variance of 16 | the-pretrained model frozen. 17 | ''' 18 | (self.feed('data') 19 | .conv(7, 7, 64, 2, 2, biased=False, relu=False, name='conv1') 20 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn_conv1') 21 | .max_pool(3, 3, 2, 2, name='pool1') 22 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1') 23 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch1')) 24 | 25 | (self.feed('pool1') 26 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a') 27 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2a') 28 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b') 29 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2b') 30 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c') 31 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch2c')) 32 | 33 | (self.feed('bn2a_branch1', 34 | 'bn2a_branch2c') 35 | .add(name='res2a') 36 | .relu(name='res2a_relu') 37 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a') 38 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2a') 39 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b') 40 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2b') 41 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c') 42 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2b_branch2c')) 43 | 44 | (self.feed('res2a_relu', 45 | 'bn2b_branch2c') 46 | .add(name='res2b') 47 | .relu(name='res2b_relu') 48 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a') 49 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2a') 50 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b') 51 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2b') 52 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c') 53 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2c_branch2c')) 54 | 55 | (self.feed('res2b_relu', 56 | 'bn2c_branch2c') 57 | .add(name='res2c') 58 | .relu(name='res2c_relu') 59 | .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1') 60 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch1')) 61 | 62 | (self.feed('res2c_relu') 63 | .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a') 64 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2a') 65 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b') 66 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2b') 67 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c') 68 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch2c')) 69 | 70 | (self.feed('bn3a_branch1', 71 | 'bn3a_branch2c') 72 | .add(name='res3a') 73 | .relu(name='res3a_relu') 74 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2a') 75 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b1_branch2a') 76 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2b') 77 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b1_branch2b') 78 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b1_branch2c') 79 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b1_branch2c')) 80 | 81 | (self.feed('res3a_relu', 82 | 'bn3b1_branch2c') 83 | .add(name='res3b1') 84 | .relu(name='res3b1_relu') 85 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2a') 86 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b2_branch2a') 87 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2b') 88 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b2_branch2b') 89 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b2_branch2c') 90 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b2_branch2c')) 91 | 92 | (self.feed('res3b1_relu', 93 | 'bn3b2_branch2c') 94 | .add(name='res3b2') 95 | .relu(name='res3b2_relu') 96 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2a') 97 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b3_branch2a') 98 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2b') 99 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b3_branch2b') 100 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b3_branch2c') 101 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b3_branch2c')) 102 | 103 | (self.feed('res3b2_relu', 104 | 'bn3b3_branch2c') 105 | .add(name='res3b3') 106 | .relu(name='res3b3_relu') 107 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch1') 108 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch1')) 109 | 110 | (self.feed('res3b3_relu') 111 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4a_branch2a') 112 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2a') 113 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4a_branch2b') 114 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2b') 115 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c') 116 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch2c')) 117 | 118 | (self.feed('bn4a_branch1', 119 | 'bn4a_branch2c') 120 | .add(name='res4a') 121 | .relu(name='res4a_relu') 122 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b1_branch2a') 123 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b1_branch2a') 124 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b1_branch2b') 125 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b1_branch2b') 126 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b1_branch2c') 127 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b1_branch2c')) 128 | 129 | (self.feed('res4a_relu', 130 | 'bn4b1_branch2c') 131 | .add(name='res4b1') 132 | .relu(name='res4b1_relu') 133 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b2_branch2a') 134 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b2_branch2a') 135 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b2_branch2b') 136 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b2_branch2b') 137 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b2_branch2c') 138 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b2_branch2c')) 139 | 140 | (self.feed('res4b1_relu', 141 | 'bn4b2_branch2c') 142 | .add(name='res4b2') 143 | .relu(name='res4b2_relu') 144 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b3_branch2a') 145 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b3_branch2a') 146 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b3_branch2b') 147 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b3_branch2b') 148 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b3_branch2c') 149 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b3_branch2c')) 150 | 151 | (self.feed('res4b2_relu', 152 | 'bn4b3_branch2c') 153 | .add(name='res4b3') 154 | .relu(name='res4b3_relu') 155 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b4_branch2a') 156 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b4_branch2a') 157 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b4_branch2b') 158 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b4_branch2b') 159 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b4_branch2c') 160 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b4_branch2c')) 161 | 162 | (self.feed('res4b3_relu', 163 | 'bn4b4_branch2c') 164 | .add(name='res4b4') 165 | .relu(name='res4b4_relu') 166 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b5_branch2a') 167 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b5_branch2a') 168 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b5_branch2b') 169 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b5_branch2b') 170 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b5_branch2c') 171 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b5_branch2c')) 172 | 173 | (self.feed('res4b4_relu', 174 | 'bn4b5_branch2c') 175 | .add(name='res4b5') 176 | .relu(name='res4b5_relu') 177 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b6_branch2a') 178 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b6_branch2a') 179 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b6_branch2b') 180 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b6_branch2b') 181 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b6_branch2c') 182 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b6_branch2c')) 183 | 184 | (self.feed('res4b5_relu', 185 | 'bn4b6_branch2c') 186 | .add(name='res4b6') 187 | .relu(name='res4b6_relu') 188 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b7_branch2a') 189 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b7_branch2a') 190 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b7_branch2b') 191 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b7_branch2b') 192 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b7_branch2c') 193 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b7_branch2c')) 194 | 195 | (self.feed('res4b6_relu', 196 | 'bn4b7_branch2c') 197 | .add(name='res4b7') 198 | .relu(name='res4b7_relu') 199 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b8_branch2a') 200 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b8_branch2a') 201 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b8_branch2b') 202 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b8_branch2b') 203 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b8_branch2c') 204 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b8_branch2c')) 205 | 206 | (self.feed('res4b7_relu', 207 | 'bn4b8_branch2c') 208 | .add(name='res4b8') 209 | .relu(name='res4b8_relu') 210 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b9_branch2a') 211 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b9_branch2a') 212 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b9_branch2b') 213 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b9_branch2b') 214 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b9_branch2c') 215 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b9_branch2c')) 216 | 217 | (self.feed('res4b8_relu', 218 | 'bn4b9_branch2c') 219 | .add(name='res4b9') 220 | .relu(name='res4b9_relu') 221 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b10_branch2a') 222 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b10_branch2a') 223 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b10_branch2b') 224 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b10_branch2b') 225 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b10_branch2c') 226 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b10_branch2c')) 227 | 228 | (self.feed('res4b9_relu', 229 | 'bn4b10_branch2c') 230 | .add(name='res4b10') 231 | .relu(name='res4b10_relu') 232 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b11_branch2a') 233 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b11_branch2a') 234 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b11_branch2b') 235 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b11_branch2b') 236 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b11_branch2c') 237 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b11_branch2c')) 238 | 239 | (self.feed('res4b10_relu', 240 | 'bn4b11_branch2c') 241 | .add(name='res4b11') 242 | .relu(name='res4b11_relu') 243 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b12_branch2a') 244 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b12_branch2a') 245 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b12_branch2b') 246 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b12_branch2b') 247 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b12_branch2c') 248 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b12_branch2c')) 249 | 250 | (self.feed('res4b11_relu', 251 | 'bn4b12_branch2c') 252 | .add(name='res4b12') 253 | .relu(name='res4b12_relu') 254 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b13_branch2a') 255 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b13_branch2a') 256 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b13_branch2b') 257 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b13_branch2b') 258 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b13_branch2c') 259 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b13_branch2c')) 260 | 261 | (self.feed('res4b12_relu', 262 | 'bn4b13_branch2c') 263 | .add(name='res4b13') 264 | .relu(name='res4b13_relu') 265 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b14_branch2a') 266 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b14_branch2a') 267 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b14_branch2b') 268 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b14_branch2b') 269 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b14_branch2c') 270 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b14_branch2c')) 271 | 272 | (self.feed('res4b13_relu', 273 | 'bn4b14_branch2c') 274 | .add(name='res4b14') 275 | .relu(name='res4b14_relu') 276 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b15_branch2a') 277 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b15_branch2a') 278 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b15_branch2b') 279 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b15_branch2b') 280 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b15_branch2c') 281 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b15_branch2c')) 282 | 283 | (self.feed('res4b14_relu', 284 | 'bn4b15_branch2c') 285 | .add(name='res4b15') 286 | .relu(name='res4b15_relu') 287 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b16_branch2a') 288 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b16_branch2a') 289 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b16_branch2b') 290 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b16_branch2b') 291 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b16_branch2c') 292 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b16_branch2c')) 293 | 294 | (self.feed('res4b15_relu', 295 | 'bn4b16_branch2c') 296 | .add(name='res4b16') 297 | .relu(name='res4b16_relu') 298 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b17_branch2a') 299 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b17_branch2a') 300 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b17_branch2b') 301 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b17_branch2b') 302 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b17_branch2c') 303 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b17_branch2c')) 304 | 305 | (self.feed('res4b16_relu', 306 | 'bn4b17_branch2c') 307 | .add(name='res4b17') 308 | .relu(name='res4b17_relu') 309 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b18_branch2a') 310 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b18_branch2a') 311 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b18_branch2b') 312 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b18_branch2b') 313 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b18_branch2c') 314 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b18_branch2c')) 315 | 316 | (self.feed('res4b17_relu', 317 | 'bn4b18_branch2c') 318 | .add(name='res4b18') 319 | .relu(name='res4b18_relu') 320 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b19_branch2a') 321 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b19_branch2a') 322 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b19_branch2b') 323 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b19_branch2b') 324 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b19_branch2c') 325 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b19_branch2c')) 326 | 327 | (self.feed('res4b18_relu', 328 | 'bn4b19_branch2c') 329 | .add(name='res4b19') 330 | .relu(name='res4b19_relu') 331 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b20_branch2a') 332 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b20_branch2a') 333 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b20_branch2b') 334 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b20_branch2b') 335 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b20_branch2c') 336 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b20_branch2c')) 337 | 338 | (self.feed('res4b19_relu', 339 | 'bn4b20_branch2c') 340 | .add(name='res4b20') 341 | .relu(name='res4b20_relu') 342 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b21_branch2a') 343 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b21_branch2a') 344 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b21_branch2b') 345 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b21_branch2b') 346 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b21_branch2c') 347 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b21_branch2c')) 348 | 349 | (self.feed('res4b20_relu', 350 | 'bn4b21_branch2c') 351 | .add(name='res4b21') 352 | .relu(name='res4b21_relu') 353 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b22_branch2a') 354 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b22_branch2a') 355 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b22_branch2b') 356 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b22_branch2b') 357 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b22_branch2c') 358 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b22_branch2c')) 359 | 360 | (self.feed('res4b21_relu', 361 | 'bn4b22_branch2c') 362 | .add(name='res4b22') 363 | .relu(name='res4b22_relu')) 364 | 365 | ######################################parsing networks################################################################ 366 | (self.feed('res4b22_relu') 367 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch1') 368 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch1')) 369 | 370 | (self.feed('res4b22_relu') 371 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5a_branch2a') 372 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2a') 373 | .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5a_branch2b') 374 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2b') 375 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c') 376 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch2c')) 377 | 378 | (self.feed('bn5a_branch1', 379 | 'bn5a_branch2c') 380 | .add(name='res5a') 381 | .relu(name='res5a_relu') 382 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a') 383 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2a') 384 | .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5b_branch2b') 385 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2b') 386 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c') 387 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5b_branch2c')) 388 | 389 | (self.feed('res5a_relu', 390 | 'bn5b_branch2c') 391 | .add(name='res5b') 392 | .relu(name='res5b_relu') 393 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a') 394 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5c_branch2a') 395 | .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5c_branch2b') 396 | .batch_normalization(activation_fn=tf.nn.relu, name='bn5c_branch2b', is_training=is_training) 397 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c') 398 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5c_branch2c')) 399 | 400 | (self.feed('res5b_relu', 401 | 'bn5c_branch2c') 402 | .add(name='res5c') 403 | .relu(name='res5c_relu') 404 | .atrous_conv(3, 3, n_classes, 6, padding='SAME', relu=False, name='fc1_human_c0')) 405 | 406 | (self.feed('res5c_relu') 407 | .atrous_conv(3, 3, n_classes, 12, padding='SAME', relu=False, name='fc1_human_c1')) 408 | 409 | (self.feed('res5c_relu') 410 | .atrous_conv(3, 3, n_classes, 18, padding='SAME', relu=False, name='fc1_human_c2')) 411 | 412 | (self.feed('res5c_relu') 413 | .atrous_conv(3, 3, n_classes, 24, padding='SAME', relu=False, name='fc1_human_c3')) 414 | 415 | (self.feed('fc1_human_c0', 416 | 'fc1_human_c1', 417 | 'fc1_human_c2', 418 | 'fc1_human_c3') 419 | .add(name='fc1_human')) 420 | 421 | (self.feed('res5c_relu') 422 | .conv(3, 3, 512, 1, 1, biased=True, relu=True, name='res5d_branch2a_parsing') 423 | .conv(3, 3, 256, 1, 1, biased=True, relu=True, name='res5d_branch2b_parsing')) 424 | 425 | # ###################################End################################################################ 426 | 427 | -------------------------------------------------------------------------------- /utils/ops.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from tensorflow.python.framework import ops 6 | 7 | def conv2d(input_, output, kernel, stride, relu, bn, name, stddev=0.01): 8 | with tf.variable_scope(name) as scope: 9 | # Convolution for a given input and kernel 10 | shape = [kernel, kernel, input_.get_shape()[-1], output] 11 | w = tf.get_variable('w', shape, initializer=tf.truncated_normal_initializer(stddev=stddev)) 12 | conv = tf.nn.conv2d(input_, w, strides=[1, stride, stride, 1], padding='SAME') 13 | # Add the biases 14 | b = tf.get_variable('b', [output], initializer=tf.constant_initializer(0.0)) 15 | conv = tf.nn.bias_add(conv, b) 16 | if bn: 17 | conv = tf.layers.batch_normalization(conv) 18 | # ReLU non-linearity 19 | if relu: 20 | conv = tf.nn.relu(conv, name=scope.name) 21 | return conv 22 | 23 | def max_pool(input_, kernel, stride, name): 24 | return tf.nn.max_pool(input_, ksize=[1, kernel, kernel, 1], strides=[1, stride, stride, 1], padding='SAME', name=name) 25 | 26 | def linear(input_, output, name, stddev=0.02, bias_start=0.0): 27 | shape = input_.get_shape().as_list() 28 | with tf.variable_scope(name) as scope: 29 | matrix = tf.get_variable("Matrix", [shape[1], output], tf.float32, 30 | tf.random_normal_initializer(stddev=stddev)) 31 | bias = tf.get_variable("bias", [output], initializer=tf.constant_initializer(bias_start)) 32 | return tf.matmul(input_, matrix) + bias 33 | 34 | def atrous_conv2d(input_, output, kernel, rate, relu, name, stddev=0.01): 35 | with tf.variable_scope(name) as scope: 36 | # Dilation convolution for a given input and kernel 37 | shape = [kernel, kernel, input_.get_shape()[-1], output] 38 | w = tf.get_variable('w', shape, initializer=tf.truncated_normal_initializer(stddev=stddev)) 39 | conv = tf.nn.atrous_conv2d(input_, w, rate, padding='SAME') 40 | # Add the biases 41 | b = tf.get_variable('b', [output], initializer=tf.constant_initializer(0.0)) 42 | conv = tf.nn.bias_add(conv, b) 43 | # ReLU non-linearity 44 | if relu: 45 | conv = tf.nn.relu(conv, name=scope.name) 46 | return conv 47 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | import tensorflow as tf 4 | import os 5 | import scipy.misc 6 | from scipy.stats import multivariate_normal 7 | import matplotlib.pyplot as plt 8 | 9 | n_classes = 20 10 | # colour map 11 | label_colours = [(0,0,0) 12 | # 0=Background 13 | ,(128,0,0),(255,0,0),(0,85,0),(170,0,51),(255,85,0) 14 | # 1=Hat, 2=Hair, 3=Glove, 4=Sunglasses, 5=UpperClothes 15 | ,(0,0,85),(0,119,221),(85,85,0),(0,85,85),(85,51,0) 16 | # 6=Dress, 7=Coat, 8=Socks, 9=Pants, 10=Jumpsuits 17 | ,(52,86,128),(0,128,0),(0,0,255),(51,170,221),(0,255,255) 18 | # 11=Scarf, 12=Skirt, 13=Face, 14=LeftArm, 15=RightArm 19 | ,(85,255,170),(170,255,85),(255,255,0),(255,170,0)] 20 | # 16=LeftLeg, 17=RightLeg, 18=LeftShoe, 19=RightShoe 21 | # image mean 22 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 23 | 24 | def decode_labels(mask, num_images=1, num_classes=20): 25 | """Decode batch of segmentation masks. 26 | 27 | Args: 28 | mask: result of inference after taking argmax. 29 | num_images: number of images to decode from the batch. 30 | 31 | Returns: 32 | A batch with num_images RGB images of the same size as the input. 33 | """ 34 | n, h, w, c = mask.shape 35 | assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images) 36 | outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8) 37 | for i in range(num_images): 38 | img = Image.new('RGB', (len(mask[i, 0]), len(mask[i]))) 39 | pixels = img.load() 40 | for j_, j in enumerate(mask[i, :, :, 0]): 41 | for k_, k in enumerate(j): 42 | if k < n_classes: 43 | pixels[k_,j_] = label_colours[k] 44 | outputs[i] = np.array(img) 45 | return outputs 46 | 47 | def prepare_label(input_batch, new_size, one_hot=True): 48 | """Resize masks and perform one-hot encoding. 49 | 50 | Args: 51 | input_batch: input tensor of shape [batch_size H W 1]. 52 | new_size: a tensor with new height and width. 53 | 54 | Returns: 55 | Outputs a tensor of shape [batch_size h w 21] 56 | with last dimension comprised of 0's and 1's only. 57 | """ 58 | with tf.name_scope('label_encode'): 59 | input_batch = tf.image.resize_nearest_neighbor(input_batch, new_size) # as labels are integer numbers, need to use NN interp. 60 | input_batch = tf.squeeze(input_batch, squeeze_dims=[3]) # reducing the channel dimension. 61 | if one_hot: 62 | input_batch = tf.one_hot(input_batch, depth=n_classes) 63 | return input_batch 64 | 65 | def inv_preprocess(imgs, num_images): 66 | """Inverse preprocessing of the batch of images. 67 | Add the mean vector and convert from BGR to RGB. 68 | 69 | Args: 70 | imgs: batch of input images. 71 | num_images: number of images to apply the inverse transformations on. 72 | 73 | Returns: 74 | The batch of the size num_images with the same spatial dimensions as the input. 75 | """ 76 | n, h, w, c = imgs.shape 77 | assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images) 78 | outputs = np.zeros((num_images, h, w, c), dtype=np.uint8) 79 | for i in range(num_images): 80 | outputs[i] = (imgs[i] + IMG_MEAN)[:, :, ::-1].astype(np.uint8) 81 | return outputs 82 | 83 | 84 | def save(saver, sess, logdir, step): 85 | '''Save weights. 86 | Args: 87 | saver: TensorFlow Saver object. 88 | sess: TensorFlow session. 89 | logdir: path to the snapshots directory. 90 | step: current training step. 91 | ''' 92 | if not os.path.exists(logdir): 93 | os.makedirs(logdir) 94 | model_name = 'model.ckpt' 95 | checkpoint_path = os.path.join(logdir, model_name) 96 | 97 | if not os.path.exists(logdir): 98 | os.makedirs(logdir) 99 | saver.save(sess, checkpoint_path, global_step=step) 100 | print('The checkpoint has been created.') 101 | 102 | def load(saver, sess, ckpt_path): 103 | '''Load trained weights. 104 | 105 | Args: 106 | saver: TensorFlow saver object. 107 | sess: TensorFlow session. 108 | ckpt_path: path to checkpoint file with parameters. 109 | ''' 110 | ckpt = tf.train.get_checkpoint_state(ckpt_path) 111 | if ckpt and ckpt.model_checkpoint_path: 112 | ckpt_name = os.path.basename(ckpt.model_checkpoint_path) 113 | saver.restore(sess, os.path.join(ckpt_path, ckpt_name)) 114 | print("Restored model parameters from {}".format(ckpt_name)) 115 | return True 116 | else: 117 | return False 118 | --------------------------------------------------------------------------------