├── .gitignore
├── LICENSE
├── LIP_model.py
├── README.md
├── datasets
    ├── examples
    │   ├── images
    │   │   ├── 114317_456748.jpg
    │   │   ├── 208597_461278.jpg
    │   │   ├── 313434_204398.jpg
    │   │   ├── 342469_423620.jpg
    │   │   ├── 447689_524975.jpg
    │   │   └── 76680_475011.jpg
    │   └── list
    │   │   └── val.txt
    └── lip
    │   ├── create_heatmaps.py
    │   ├── lip_train_set.csv
    │   └── list
    │       ├── train_id.txt
    │       ├── train_rev.txt
    │       └── val_id.txt
├── evaluate_parsing_JPPNet-s2.py
├── evaluate_pose_JPPNet-s2.py
├── get_maximum_square_from_segmented_image.py
├── kaffe
    ├── __init__.py
    ├── caffe
    │   ├── __init__.py
    │   ├── caffe_pb2.py
    │   └── resolver.py
    ├── errors.py
    ├── graph.py
    ├── layers.py
    ├── shapes.py
    ├── tensorflow
    │   ├── __init__.py
    │   ├── network.py
    │   └── transformer.py
    └── transformers.py
├── train_JPPNet-s2.py
└── utils
    ├── __init__.py
    ├── image_reader.py
    ├── lip_reader.py
    ├── model.py
    ├── ops.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | checkpoint/
2 | output/
3 | logs/
4 | model/
5 | *.pyc
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Vladimir Nekrasov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/LIP_model.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from utils.ops import *
 3 | 
 4 | 
 5 | #------------------------network setting---------------------
 6 | #################################################
 7 | 
 8 | ##  refine net version 4.   07.17
 9 | 
10 | def pose_net(image, name):
11 |   with tf.variable_scope(name) as scope:
12 |       is_BN = False
13 |       pose_conv1 = conv2d(image, 512, 3, 1, relu=True, bn=is_BN, name='pose_conv1')
14 |       pose_conv2 = conv2d(pose_conv1, 512, 3, 1, relu=True, bn=is_BN, name='pose_conv2')
15 |       pose_conv3 = conv2d(pose_conv2, 256, 3, 1, relu=True, bn=is_BN, name='pose_conv3')
16 |       pose_conv4 = conv2d(pose_conv3, 256, 3, 1, relu=True, bn=is_BN, name='pose_conv4')
17 |       pose_conv5 = conv2d(pose_conv4, 256, 3, 1, relu=True, bn=is_BN, name='pose_conv5')
18 |       pose_conv6 = conv2d(pose_conv5, 256, 3, 1, relu=True, bn=is_BN, name='pose_conv6')
19 | 
20 |       pose_conv7 = conv2d(pose_conv6, 512, 1, 1, relu=True, bn=is_BN, name='pose_conv7')
21 |       pose_conv8 = conv2d(pose_conv7, 16, 1, 1, relu=False, bn=is_BN, name='pose_conv8')
22 | 
23 |       return pose_conv8, pose_conv6
24 | 
25 | 
26 | def pose_refine(pose, parsing, pose_fea, name):
27 |   with tf.variable_scope(name) as scope:
28 |       is_BN = False
29 |       # 1*1 convolution remaps the heatmaps to match the number of channels of the intermediate features.
30 |       pose = conv2d(pose, 128, 1, 1, relu=True, bn=is_BN, name='pose_remap')
31 |       parsing = conv2d(parsing, 128, 1, 1, relu=True, bn=is_BN, name='parsing_remap')
32 |       # concat 
33 |       pos_par = tf.concat([pose, parsing, pose_fea], 3)
34 |       conv1 = conv2d(pos_par, 512, 3, 1, relu=True, bn=is_BN, name='conv1')
35 |       conv2 = conv2d(conv1, 256, 5, 1, relu=True, bn=is_BN, name='conv2')
36 |       conv3 = conv2d(conv2, 256, 7, 1, relu=True, bn=is_BN, name='conv3')
37 |       conv4 = conv2d(conv3, 256, 9, 1, relu=True, bn=is_BN, name='conv4')
38 | 
39 |       conv5 = conv2d(conv4, 256, 1, 1, relu=True, bn=is_BN, name='conv5')
40 |       conv6 = conv2d(conv5, 16, 1, 1, relu=False, bn=is_BN, name='conv6')
41 |       
42 |       return conv6, conv4
43 | 
44 | 
45 | def parsing_refine(parsing, pose, parsing_fea, name):
46 |   with tf.variable_scope(name) as scope:
47 |       is_BN = False
48 |       pose = conv2d(pose, 128, 1, 1, relu=True, bn=is_BN, name='pose_remap')
49 |       parsing = conv2d(parsing, 128, 1, 1, relu=True, bn=is_BN, name='parsing_remap')
50 | 
51 |       par_pos = tf.concat([parsing, pose, parsing_fea], 3)
52 |       parsing_conv1 = conv2d(par_pos, 512, 3, 1, relu=True, bn=is_BN, name='parsing_conv1')
53 |       parsing_conv2 = conv2d(parsing_conv1, 256, 5, 1, relu=True, bn=is_BN, name='parsing_conv2')
54 |       parsing_conv3 = conv2d(parsing_conv2, 256, 7, 1, relu=True, bn=is_BN, name='parsing_conv3')
55 |       parsing_conv4 = conv2d(parsing_conv3, 256, 9, 1, relu=True, bn=is_BN, name='parsing_conv4')
56 | 
57 |       parsing_conv5 = conv2d(parsing_conv4, 256, 1, 1, relu=True, bn=is_BN, name='parsing_conv5')
58 |       parsing_human1 = atrous_conv2d(parsing_conv5, 20, 3, rate=6, relu=False, name='parsing_human1')
59 |       parsing_human2 = atrous_conv2d(parsing_conv5, 20, 3, rate=12, relu=False, name='parsing_human2')
60 |       parsing_human3 = atrous_conv2d(parsing_conv5, 20, 3, rate=18, relu=False, name='parsing_human3')
61 |       parsing_human4 = atrous_conv2d(parsing_conv5, 20, 3, rate=24, relu=False, name='parsing_human4')
62 |       parsing_human = tf.add_n([parsing_human1, parsing_human2, parsing_human3, parsing_human4], name='parsing_human')
63 |       
64 |       return parsing_human, parsing_conv4
65 | #################################################
66 | 
67 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Joint Body Parsing & Pose Estimation Network (JPPNet)
 2 | Xiaodan Liang, Ke Gong, Xiaohui Shen, and Liang Lin, "Look into Person: Joint Body Parsing & Pose Estimation Network and A New Benchmark", T-PAMI 2018.
 3 | 
 4 | ### Introduction
 5 | 
 6 | JPPNet is a state-of-art deep learning methord for human parsing and pose estimation built on top of [Tensorflow](http://www.tensorflow.org).
 7 | 
 8 | This novel joint human parsing and pose estimation network incorporates the multiscale feature connections and iterative location refinement in an end-to-end framework to investigate efficient context modeling and then enable parsing and pose tasks that are mutually beneficial to each other. This unified framework achieves state-of-the-art performance for both human parsing and pose estimation tasks. 
 9 | 
10 | 
11 | This distribution provides a publicly available implementation for the key model ingredients reported in our latest [paper](https://arxiv.org/pdf/1804.01984.pdf) which is accepted by T-PAMI 2018.
12 | 
13 | We simplify the network to solve human parsing by exploring a novel self-supervised structure-sensitive learning approach, which imposes human pose structures into the parsing results without resorting to extra supervision. There is also a public implementation of this self-supervised structure-sensitive JPPNet ([SS-JPPNet](https://github.com/Engineering-Course/LIP_SSL)).
14 | 
15 | 
16 | ### Look into People (LIP) Dataset
17 | 
18 | The SSL is trained and evaluated on our [LIP dataset](https://lip.sysuhcp.com/) for human parsing.  Please check it for more model details. The dataset is also available at [google drive](https://drive.google.com/drive/folders/0BzvH3bSnp3E9QjVYZlhWSjltSWM?resourcekey=0-nkS8bDVjPs3bEw3UZW-omA&usp=sharing) and [baidu drive](http://pan.baidu.com/s/1nvqmZBN).
19 | 
20 | 
21 | ### Pre-trained models
22 | 
23 | We have released our trained models of JPPNet on LIP dataset at [google drive](https://drive.google.com/open?id=1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg) and [baidu drive](https://pan.baidu.com/s/1hQvg1TMIt0JA0yMfjyzQgQ).
24 | 
25 | 
26 | 
27 | 
28 | ### Inference
29 | 1. Download the pre-trained model and store in $HOME/checkpoint.
30 | 2. Prepare the images and store in $HOME/datasets.
31 | 3. Run evaluate_pose_JPPNet-s2.py for pose estimation and evaluate_parsing_JPPNet-s2.py for human parsing.
32 | 4. The results are saved in $HOME/output
33 | 
34 | ### Training
35 | 1. Download the pre-trained model and store in $HOME/checkpoint.
36 | 2. Download LIP dataset or prepare your own data and store in $HOME/datasets.
37 | 3. For LIP dataset, we have provided images, parsing labels, lists and the left-right flipping labels (labels_rev) for data augmentation. You need to generate the heatmaps of pose labels. We have provided a script for reference.
38 | 4. Run train_JPPNet-s2.py to train the JPPNet with two refinement stages.
39 | 5. Use evaluate_pose_JPPNet-s2.py and evaluate_parsing_JPPNet-s2.py to generate the results or evaluate the trained models.
40 | 6. Note that the LIPReader class is only suit for labels in LIP for the left-right flipping augmentation. If you want to train on other datasets with different labels, you may have to re-write an image reader class.
41 | 
42 | ## Citation
43 | If you use this code for your research, please cite our papers.
44 | ```
45 | @article{liang2018look,
46 |   title={Look into Person: Joint Body Parsing \& Pose Estimation Network and a New Benchmark},
47 |   author={Liang, Xiaodan and Gong, Ke and Shen, Xiaohui and Lin, Liang},
48 |   journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
49 |   year={2018},
50 |   publisher={IEEE}
51 | }
52 | 
53 | @InProceedings{Gong_2017_CVPR,
54 |   author = {Gong, Ke and Liang, Xiaodan and Zhang, Dongyu and Shen, Xiaohui and Lin, Liang},
55 |   title = {Look Into Person: Self-Supervised Structure-Sensitive Learning and a New Benchmark for Human Parsing},
56 |   booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
57 |   month = {July},
58 |   year = {2017}
59 | }
60 | ```
61 | 
62 | 
63 | ## Related work
64 | + Self-supervised Structure-sensitive Learning [SSL](https://github.com/Engineering-Course/LIP_SSL), CVPR2017
65 | + Instance-level Human Parsing via Part Grouping Network [PGN](https://github.com/Engineering-Course/CIHP_PGN), ECCV2018
66 | + Graphonomy: Universal Human Parsing via Graph Transfer Learning [Graphonomy](https://github.com/Gaoyiminggithub/Graphonomy), CVPR2019
67 | 


--------------------------------------------------------------------------------
/datasets/examples/images/114317_456748.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/114317_456748.jpg


--------------------------------------------------------------------------------
/datasets/examples/images/208597_461278.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/208597_461278.jpg


--------------------------------------------------------------------------------
/datasets/examples/images/313434_204398.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/313434_204398.jpg


--------------------------------------------------------------------------------
/datasets/examples/images/342469_423620.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/342469_423620.jpg


--------------------------------------------------------------------------------
/datasets/examples/images/447689_524975.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/447689_524975.jpg


--------------------------------------------------------------------------------
/datasets/examples/images/76680_475011.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Engineering-Course/LIP_JPPNet/f274c9b65a4159268a4d1a1a2ab1d712d7ad3b32/datasets/examples/images/76680_475011.jpg


--------------------------------------------------------------------------------
/datasets/examples/list/val.txt:
--------------------------------------------------------------------------------
1 | /images/114317_456748.jpg
2 | /images/342469_423620.jpg
3 | /images/76680_475011.jpg
4 | /images/447689_524975.jpg
5 | /images/208597_461278.jpg
6 | /images/313434_204398.jpg
7 | 


--------------------------------------------------------------------------------
/datasets/lip/create_heatmaps.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import math
 3 | import random
 4 | import scipy.misc
 5 | import numpy as np
 6 | from scipy.stats import multivariate_normal
 7 | import scipy.io as sio 
 8 | import csv
 9 | 
10 | csv_file = 'lip_train_set.csv'
11 | 
12 | with open(csv_file, "r") as input_file:
13 | 
14 |     for row in csv.reader(input_file):
15 | 
16 |         img_id = row.pop(0)[:-4]
17 |         print img_id
18 |         
19 |         image_path = './images/{}.jpg'.format(img_id)
20 |         img = scipy.misc.imread(image_path).astype(np.float)
21 |         rows = img.shape[0]
22 |         cols = img.shape[1]
23 |         heatmap_ = np.zeros((rows, cols, 16), dtype=np.float64)
24 |         
25 |         for idx, point in enumerate(row):
26 |             if 'nan' in point:
27 |                 point = 0
28 |             if idx % 3 == 0:
29 |                 c_ = int(point)
30 |                 c_ = min(c_, cols-1)
31 |                 c_ = max(c_, 0)
32 |             elif idx % 3 == 1 :
33 |                 r_ = int(point)
34 |                 r_ = min(r_, rows-1)
35 |                 r_ = max(r_, 0)
36 |                 if c_ + r_ > 0:
37 |                     var = multivariate_normal(mean=[r_, c_], cov=64)
38 |                     l1 = max(r_-25, 0)
39 |                     r1 = min(r_+25, rows-1)
40 |                     l2 = max(c_-25, 0)
41 |                     r2 = min(c_+25, cols-1)
42 |                     for i in xrange(l1, r1):
43 |                         for j in xrange(l2, r2):
44 |                             heatmap_[i, j, int(idx / 3)] = var.pdf([i, j]) * 400
45 |                 save_path = './heatmap/{}_{}.png'.format(img_id, int(idx/3))
46 |                 scipy.misc.imsave(save_path, heatmap_[:,:,int(idx/3)])
47 |         heatsum_ = np.sum(heatmap_, axis=2)
48 | 


--------------------------------------------------------------------------------
/evaluate_parsing_JPPNet-s2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | from datetime import datetime
  4 | import os
  5 | import sys
  6 | import time
  7 | import scipy.misc
  8 | import cv2
  9 | from PIL import Image
 10 | os.environ["CUDA_VISIBLE_DEVICES"]="0"
 11 | 
 12 | import tensorflow as tf
 13 | import numpy as np
 14 | import matplotlib.pyplot as plt
 15 | from utils import *
 16 | from LIP_model import *
 17 | 
 18 | N_CLASSES = 20
 19 | INPUT_SIZE = (384, 384)
 20 | DATA_DIRECTORY = './datasets/examples'
 21 | DATA_LIST_PATH = './datasets/examples/list/val.txt'
 22 | NUM_STEPS = 6 # Number of images in the validation set.
 23 | RESTORE_FROM = './checkpoint/JPPNet-s2'
 24 | OUTPUT_DIR = './output/parsing/val'
 25 | if not os.path.exists(OUTPUT_DIR):
 26 |     os.makedirs(OUTPUT_DIR)
 27 | 
 28 | def main():
 29 |     """Create the model and start the evaluation process."""
 30 |     
 31 |     # Create queue coordinator.
 32 |     coord = tf.train.Coordinator()
 33 |     h, w = INPUT_SIZE
 34 |     # Load reader.
 35 |     with tf.name_scope("create_inputs"):
 36 |         reader = ImageReader(DATA_DIRECTORY, DATA_LIST_PATH, None, False, False, coord)
 37 |         image = reader.image
 38 |         image_rev = tf.reverse(image, tf.stack([1]))
 39 |         image_list = reader.image_list
 40 | 
 41 |     image_batch_origin = tf.stack([image, image_rev])
 42 |     image_batch = tf.image.resize_images(image_batch_origin, [int(h), int(w)])
 43 |     image_batch075 = tf.image.resize_images(image_batch_origin, [int(h * 0.75), int(w * 0.75)])
 44 |     image_batch125 = tf.image.resize_images(image_batch_origin, [int(h * 1.25), int(w * 1.25)])
 45 |     
 46 |     # Create network.
 47 |     with tf.variable_scope('', reuse=False):
 48 |         net_100 = JPPNetModel({'data': image_batch}, is_training=False, n_classes=N_CLASSES)
 49 |     with tf.variable_scope('', reuse=True):
 50 |         net_075 = JPPNetModel({'data': image_batch075}, is_training=False, n_classes=N_CLASSES)
 51 |     with tf.variable_scope('', reuse=True):
 52 |         net_125 = JPPNetModel({'data': image_batch125}, is_training=False, n_classes=N_CLASSES)
 53 | 
 54 |     
 55 |     # parsing net
 56 |     parsing_fea1_100 = net_100.layers['res5d_branch2b_parsing']
 57 |     parsing_fea1_075 = net_075.layers['res5d_branch2b_parsing']
 58 |     parsing_fea1_125 = net_125.layers['res5d_branch2b_parsing']
 59 | 
 60 |     parsing_out1_100 = net_100.layers['fc1_human']
 61 |     parsing_out1_075 = net_075.layers['fc1_human']
 62 |     parsing_out1_125 = net_125.layers['fc1_human']
 63 | 
 64 |     # pose net
 65 |     resnet_fea_100 = net_100.layers['res4b22_relu']
 66 |     resnet_fea_075 = net_075.layers['res4b22_relu']
 67 |     resnet_fea_125 = net_125.layers['res4b22_relu']
 68 | 
 69 |     with tf.variable_scope('', reuse=False):
 70 |         pose_out1_100, pose_fea1_100 = pose_net(resnet_fea_100, 'fc1_pose')
 71 |         pose_out2_100, pose_fea2_100 = pose_refine(pose_out1_100, parsing_out1_100, pose_fea1_100, name='fc2_pose')
 72 |         parsing_out2_100, parsing_fea2_100 = parsing_refine(parsing_out1_100, pose_out1_100, parsing_fea1_100, name='fc2_parsing')
 73 |         parsing_out3_100, parsing_fea3_100 = parsing_refine(parsing_out2_100, pose_out2_100, parsing_fea2_100, name='fc3_parsing')
 74 | 
 75 |     with tf.variable_scope('', reuse=True):
 76 |         pose_out1_075, pose_fea1_075 = pose_net(resnet_fea_075, 'fc1_pose')
 77 |         pose_out2_075, pose_fea2_075 = pose_refine(pose_out1_075, parsing_out1_075, pose_fea1_075, name='fc2_pose')
 78 |         parsing_out2_075, parsing_fea2_075 = parsing_refine(parsing_out1_075, pose_out1_075, parsing_fea1_075, name='fc2_parsing')
 79 |         parsing_out3_075, parsing_fea3_075 = parsing_refine(parsing_out2_075, pose_out2_075, parsing_fea2_075, name='fc3_parsing')
 80 | 
 81 |     with tf.variable_scope('', reuse=True):
 82 |         pose_out1_125, pose_fea1_125 = pose_net(resnet_fea_125, 'fc1_pose')
 83 |         pose_out2_125, pose_fea2_125 = pose_refine(pose_out1_125, parsing_out1_125, pose_fea1_125, name='fc2_pose')
 84 |         parsing_out2_125, parsing_fea2_125 = parsing_refine(parsing_out1_125, pose_out1_125, parsing_fea1_125, name='fc2_parsing')
 85 |         parsing_out3_125, parsing_fea3_125 = parsing_refine(parsing_out2_125, pose_out2_125, parsing_fea2_125, name='fc3_parsing')
 86 | 
 87 | 
 88 |     parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, tf.shape(image_batch_origin)[1:3,]),
 89 |                                            tf.image.resize_images(parsing_out1_075, tf.shape(image_batch_origin)[1:3,]),
 90 |                                            tf.image.resize_images(parsing_out1_125, tf.shape(image_batch_origin)[1:3,])]), axis=0)
 91 |     parsing_out2 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out2_100, tf.shape(image_batch_origin)[1:3,]),
 92 |                                            tf.image.resize_images(parsing_out2_075, tf.shape(image_batch_origin)[1:3,]),
 93 |                                            tf.image.resize_images(parsing_out2_125, tf.shape(image_batch_origin)[1:3,])]), axis=0)
 94 |     parsing_out3 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out3_100, tf.shape(image_batch_origin)[1:3,]),
 95 |                                            tf.image.resize_images(parsing_out3_075, tf.shape(image_batch_origin)[1:3,]),
 96 |                                            tf.image.resize_images(parsing_out3_125, tf.shape(image_batch_origin)[1:3,])]), axis=0)
 97 | 
 98 |     raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0)
 99 |     head_output, tail_output = tf.unstack(raw_output, num=2, axis=0)
100 |     tail_list = tf.unstack(tail_output, num=20, axis=2)
101 |     tail_list_rev = [None] * 20
102 |     for xx in range(14):
103 |         tail_list_rev[xx] = tail_list[xx]
104 |     tail_list_rev[14] = tail_list[15]
105 |     tail_list_rev[15] = tail_list[14]
106 |     tail_list_rev[16] = tail_list[17]
107 |     tail_list_rev[17] = tail_list[16]
108 |     tail_list_rev[18] = tail_list[19]
109 |     tail_list_rev[19] = tail_list[18]
110 |     tail_output_rev = tf.stack(tail_list_rev, axis=2)
111 |     tail_output_rev = tf.reverse(tail_output_rev, tf.stack([1]))
112 | 
113 |     
114 |     raw_output_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]), axis=0)
115 |     raw_output_all = tf.expand_dims(raw_output_all, dim=0)
116 |     raw_output_all = tf.argmax(raw_output_all, dimension=3)
117 |     pred_all = tf.expand_dims(raw_output_all, dim=3) # Create 4-d tensor.
118 | 
119 |     # Which variables to load.
120 |     restore_var = tf.global_variables()
121 |     # Set up tf session and initialize variables. 
122 |     config = tf.ConfigProto()
123 |     config.gpu_options.allow_growth = True
124 |     sess = tf.Session(config=config)
125 |     init = tf.global_variables_initializer()
126 |     
127 |     sess.run(init)
128 |     sess.run(tf.local_variables_initializer())
129 |     
130 |     # Load weights.
131 |     loader = tf.train.Saver(var_list=restore_var)
132 |     if RESTORE_FROM is not None:
133 |         if load(loader, sess, RESTORE_FROM):
134 |             print(" [*] Load SUCCESS")
135 |         else:
136 |             print(" [!] Load failed...")
137 |     
138 |     # Start queue threads.
139 |     threads = tf.train.start_queue_runners(coord=coord, sess=sess)
140 | 
141 | 
142 |     # Iterate over training steps.
143 |     for step in range(NUM_STEPS):
144 |         parsing_ = sess.run(pred_all)
145 |         if step % 100 == 0:
146 |             print('step {:d}'.format(step))
147 |             print (image_list[step])
148 |         img_split = image_list[step].split('/')
149 |         img_id = img_split[-1][:-4]
150 | 
151 |         msk = decode_labels(parsing_, num_classes=N_CLASSES)
152 |         parsing_im = Image.fromarray(msk[0])
153 |         parsing_im.save('{}/{}_vis.png'.format(OUTPUT_DIR, img_id))
154 |         cv2.imwrite('{}/{}.png'.format(OUTPUT_DIR, img_id), parsing_[0,:,:,0])
155 | 
156 |     coord.request_stop()
157 |     coord.join(threads)
158 |     
159 | if __name__ == '__main__':
160 |     main()
161 | 
162 | 
163 | ##############################################################333
164 | 


--------------------------------------------------------------------------------
/evaluate_pose_JPPNet-s2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import os
  3 | import time
  4 | from glob import glob
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | os.environ["CUDA_VISIBLE_DEVICES"]="0"
  8 | from utils import *
  9 | from LIP_model import *
 10 | import matplotlib.pyplot as plt
 11 | import scipy.misc
 12 | import scipy.io as sio
 13 | 
 14 | 
 15 | NUM_STEPS = 6 # Number of images in the validation set.
 16 | INPUT_SIZE = (384, 384)
 17 | N_CLASSES = 20
 18 | DATA_DIRECTORY = './datasets/examples'
 19 | DATA_LIST_PATH = './datasets/examples/list/val.txt'
 20 | RESTORE_FROM = './checkpoint/JPPNet-s2'
 21 | OUTPUT_DIR = './output/pose/val'
 22 | if not os.path.exists(OUTPUT_DIR):
 23 |     os.makedirs(OUTPUT_DIR)
 24 | 
 25 | def main():
 26 |     """Create the model and start the evaluation process."""
 27 |     
 28 |     # Create queue coordinator.
 29 |     coord = tf.train.Coordinator()
 30 |     h, w = INPUT_SIZE
 31 |     # Load reader.
 32 |     with tf.name_scope("create_inputs"):
 33 |         reader = ImageReader(DATA_DIRECTORY, DATA_LIST_PATH, None, False, False, coord)
 34 |         image = reader.image
 35 |         image_rev = tf.reverse(image, tf.stack([1]))
 36 |         image_list = reader.image_list
 37 |     
 38 |     image_batch_origin = tf.stack([image, image_rev])
 39 |     image_batch = tf.image.resize_images(image_batch_origin, [int(h), int(w)])
 40 |     image_batch125 = tf.image.resize_images(image_batch_origin, [int(h * 1.25), int(w * 1.25)])
 41 |     image_batch075 = tf.image.resize_images(image_batch_origin, [int(h * 0.75), int(w * 0.75)])
 42 | 
 43 |     # Create network.
 44 |     with tf.variable_scope('', reuse=False):
 45 |         net_100 = JPPNetModel({'data': image_batch}, is_training=False, n_classes=N_CLASSES)
 46 |     with tf.variable_scope('', reuse=True):
 47 |         net_125 = JPPNetModel({'data': image_batch125}, is_training=False, n_classes=N_CLASSES)
 48 |     with tf.variable_scope('', reuse=True):
 49 |         net_075 = JPPNetModel({'data': image_batch075}, is_training=False, n_classes=N_CLASSES)
 50 | 
 51 | 
 52 |     # parsing net
 53 |     parsing_fea1_100 = net_100.layers['res5d_branch2b_parsing']
 54 |     parsing_fea1_125 = net_125.layers['res5d_branch2b_parsing']
 55 |     parsing_fea1_075 = net_075.layers['res5d_branch2b_parsing']
 56 | 
 57 |     parsing_out1_100 = net_100.layers['fc1_human']
 58 |     parsing_out1_125 = net_125.layers['fc1_human']
 59 |     parsing_out1_075 = net_075.layers['fc1_human']
 60 | 
 61 |     # pose net
 62 |     resnet_fea_100 = net_100.layers['res4b22_relu']
 63 |     resnet_fea_125 = net_125.layers['res4b22_relu']
 64 |     resnet_fea_075 = net_075.layers['res4b22_relu']
 65 |     
 66 |     with tf.variable_scope('', reuse=False):
 67 |         pose_out1_100, pose_fea1_100 = pose_net(resnet_fea_100, 'fc1_pose')
 68 |         pose_out2_100, pose_fea2_100 = pose_refine(pose_out1_100, parsing_out1_100, pose_fea1_100, name='fc2_pose')
 69 |         parsing_out2_100, parsing_fea2_100 = parsing_refine(parsing_out1_100, pose_out1_100, parsing_fea1_100, name='fc2_parsing')
 70 |         pose_out3_100, pose_fea3_100 = pose_refine(pose_out2_100, parsing_out2_100, pose_fea2_100, name='fc3_pose')
 71 | 
 72 |     with tf.variable_scope('', reuse=True):
 73 |         pose_out1_125, pose_fea1_125 = pose_net(resnet_fea_125, 'fc1_pose')
 74 |         pose_out2_125, pose_fea2_125 = pose_refine(pose_out1_125, parsing_out1_125, pose_fea1_125, name='fc2_pose')
 75 |         parsing_out2_125, parsing_fea2_125 = parsing_refine(parsing_out1_125, pose_out1_125, parsing_fea1_125, name='fc2_parsing')
 76 |         pose_out3_125, pose_fea3_125 = pose_refine(pose_out2_125, parsing_out2_125, pose_fea2_125, name='fc3_pose')
 77 | 
 78 |     with tf.variable_scope('', reuse=True):
 79 |         pose_out1_075, pose_fea1_075 = pose_net(resnet_fea_075, 'fc1_pose')
 80 |         pose_out2_075, pose_fea2_075 = pose_refine(pose_out1_075, parsing_out1_075, pose_fea1_075, name='fc2_pose')
 81 |         parsing_out2_075, parsing_fea2_075 = parsing_refine(parsing_out1_075, pose_out1_075, parsing_fea1_075, name='fc2_parsing')
 82 |         pose_out3_075, pose_fea3_075 = pose_refine(pose_out2_075, parsing_out2_075, pose_fea2_075, name='fc3_pose')
 83 | 
 84 | 
 85 |     pose_out3 = tf.reduce_mean(tf.stack([tf.image.resize_nearest_neighbor(pose_out3_100, tf.shape(image_batch_origin)[1:3,]),
 86 |                                          tf.image.resize_nearest_neighbor(pose_out3_125, tf.shape(image_batch_origin)[1:3,]),
 87 |                                          tf.image.resize_nearest_neighbor(pose_out3_075, tf.shape(image_batch_origin)[1:3,])]), axis=0)
 88 | 
 89 |     head_output, tail_output = tf.unstack(pose_out3, num=2, axis=0)
 90 |     tail_list = tf.unstack(tail_output, num=16, axis=2)
 91 |     tail_list_rev = [None] * 16
 92 |     tail_list_rev[0] = tail_list[5]
 93 |     tail_list_rev[1] = tail_list[4]
 94 |     tail_list_rev[2] = tail_list[3]
 95 |     tail_list_rev[3] = tail_list[2]
 96 |     tail_list_rev[4] = tail_list[1]
 97 |     tail_list_rev[5] = tail_list[0]
 98 |     tail_list_rev[10] = tail_list[15]
 99 |     tail_list_rev[11] = tail_list[14]
100 |     tail_list_rev[12] = tail_list[13]
101 |     tail_list_rev[13] = tail_list[12]
102 |     tail_list_rev[14] = tail_list[11]
103 |     tail_list_rev[15] = tail_list[10]
104 |     tail_list_rev[6] = tail_list[6]
105 |     tail_list_rev[7] = tail_list[7]
106 |     tail_list_rev[8] = tail_list[8]
107 |     tail_list_rev[9] = tail_list[9]
108 |     tail_output_rev = tf.stack(tail_list_rev, axis=2)
109 |     tail_output_rev = tf.reverse(tail_output_rev, tf.stack([1]))
110 | 
111 |     output_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]), axis=0)
112 |     output_all = tf.expand_dims(output_all, dim=0)
113 | 
114 |     # Which variables to load.
115 |     restore_var = tf.global_variables()
116 | 
117 |     # Set up tf session and initialize variables. 
118 |     config = tf.ConfigProto()
119 |     config.gpu_options.allow_growth = True
120 |     sess = tf.Session(config=config)
121 |     init = tf.global_variables_initializer()
122 |     
123 |     sess.run(init)
124 |     sess.run(tf.local_variables_initializer())
125 |     
126 |     # Load weights.
127 |     loader = tf.train.Saver(var_list=restore_var)
128 |     if RESTORE_FROM is not None:
129 |         if load(loader, sess, RESTORE_FROM):
130 |             print(" [*] Load SUCCESS")
131 |         else:
132 |             print(" [!] Load failed...")
133 |     
134 |     # Start queue threads.
135 |     threads = tf.train.start_queue_runners(coord=coord, sess=sess)
136 |     
137 | 
138 |     # Iterate over training steps.
139 |     for step in range(NUM_STEPS):
140 |         predict_ = sess.run(output_all)
141 |         save_lip_images(image_list[step], predict_, OUTPUT_DIR)
142 |         if step % 100 == 0:
143 |             print('step {:d}'.format(step))
144 |             print (image_list[step])
145 | 
146 |     coord.request_stop()
147 |     coord.join(threads)
148 |    
149 | 
150 | def save_lip_images(image_path, samples, out_dir):
151 |     img_A = scipy.misc.imread(image_path).astype(np.float)
152 |     rows = img_A.shape[0]
153 |     cols = img_A.shape[1]
154 |     image = samples[0]
155 |     img_split = image_path.split('/')
156 |     img_id = img_split[-1][:-4]
157 |     with open('{}/{}.txt'.format(out_dir, img_id), 'w') as f:
158 |         for p in xrange(image.shape[2]):
159 |             channel_ = image[:,:,p]
160 |             if channel_.shape[0] != rows or channel_.shape[1] != cols:
161 |                 print ('sizes do not match...')
162 |                 channel_ = scipy.misc.imresize(channel_, [rows, cols], interp='nearest')
163 |             r_, c_ = np.unravel_index(channel_.argmax(), channel_.shape)
164 |             f.write('%d %d ' % (int(c_), int(r_)))
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     main()
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/get_maximum_square_from_segmented_image.py:
--------------------------------------------------------------------------------
  1 | # This function is used to get the largest square from the cropped and segmented image. It can be further used to find patterns
  2 | import cv2
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from PIL import Image
  6 | import time
  7 | from collections import namedtuple
  8 | import glob
  9 | 
 10 | def printMaxSubSquare(M): 
 11 |     """" find the largest square """  
 12 |     R = len(M) # no. of rows in M[][] 
 13 |     C = len(M[0]) # no. of columns in M[][] 
 14 |    
 15 |     S = [[0 for k in range(C)] for l in range(R)] 
 16 |     # here we have set the first row and column of S[][] 
 17 |   
 18 |     # Construct other entries 
 19 |     for i in range(1, R): 
 20 |         for j in range(1, C): 
 21 |             if (M[i][j] == 1): 
 22 |                 S[i][j] = min(S[i][j-1], S[i-1][j], 
 23 |                             S[i-1][j-1]) + 1
 24 |             else: 
 25 |                 S[i][j] = 0
 26 |       
 27 |     # Find the maximum entry and 
 28 |     # indices of maximum entry in S[][] 
 29 |     max_of_s = S[0][0] 
 30 |     max_i = 0
 31 |     max_j = 0
 32 |     for i in range(R): 
 33 |         for j in range(C): 
 34 |             if (max_of_s < S[i][j]): 
 35 |                 max_of_s = S[i][j] 
 36 |                 max_i = i 
 37 |                 max_j = j 
 38 |   
 39 |     print("Maximum size sub-matrix is: ") 
 40 |     count_i = 0
 41 |     count_j = 0
 42 |     position_matrix = []
 43 |     for i in range(max_i, max_i - max_of_s, -1): 
 44 |         for j in range(max_j, max_j - max_of_s, -1): 
 45 |             position_matrix.append((i,j)) 
 46 |         count_i+=1 
 47 |         
 48 |     print('count_i :' + str(count_i))
 49 |     print('count_j :' + str(count_j))
 50 |     return position_matrix
 51 | 
 52 | 
 53 | def crop_square_portion(image_file_name):
 54 |     """" crop and save image """    
 55 |     image_file_name_list = image_file_name.split('_')
 56 |     vis_file_name = '_'.join(image_file_name_list[:2])+'_vis.png'
 57 |     save_file_name = '_'.join(image_file_name_list[:3])+'_square.png'
 58 |     cloth_type = image_file_name_list[-2]
 59 |     list_index = cloth_type_list.index(cloth_type)
 60 |     light_shade = light_shade_list[list_index]
 61 |     dark_shade = dark_shade_list[list_index]
 62 |     print(light_shade,dark_shade)
 63 |     #read input image
 64 |     img = cv2.imread(INPUT_DIR+vis_file_name,cv2.COLOR_BGR2RGB)
 65 |     
 66 |     #detect shades from vis:
 67 |     hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
 68 |     mask = cv2.inRange(hsv, light_shade, dark_shade)
 69 |     
 70 |     #coverting to binary array:
 71 |     np_img = np.array(mask)
 72 |     np_img[np_img == 255] = 1
 73 |     
 74 |     #coverting to binary array:
 75 |     np_img = np.array(mask)
 76 |     np_img[np_img == 255] = 1
 77 | 
 78 |     #find and plot the largest square
 79 |     var = printMaxSubSquare(np_img)
 80 |     for point in var:
 81 |         a,b = point
 82 |         pt = (b,a)
 83 |         cv2.circle(np_img,pt,5,(200,0,0),2)
 84 | 
 85 |     ##convert mask to bunary mask
 86 |     np_img[np_img != 200] = 0
 87 |     print('final mask shape:')
 88 |     print(np_img.shape)
 89 | 
 90 |     ##crop and save the square image
 91 |     img = cv2.imread(INPUT_DIR+image_file_name,cv2.COLOR_BGR2RGB)
 92 |     print('input image shape:')
 93 |     print(img.shape)
 94 |     x,y,w,h = cv2.boundingRect(np_img)
 95 |     crop_img = img[y:y+h,x:x+w]
 96 |     print('cropped image shape:')
 97 |     print(crop_img.shape)
 98 |     cv2.imwrite(OUTPUT_DIR+save_file_name, crop_img)
 99 |     
100 | 
101 | if __name__ == "__main__":    
102 |     INPUT_DIR = r' set your input folder where segmented images are there'
103 |     OUTPUT_DIR = r' set your output images'
104 |     cloth_type_list = ['UpperClothes','Dress','Pants','Scarf','Skirt','Coat']
105 |     light_shade_list = [(100, 240, 255),(0,255,70),(0,255,70),(10,150,125),(50,0,70),(10,100,200)]
106 |     dark_shade_list = [(190, 255, 255),(0,255,200),(100,255,200),(100,160,130),(60,255,200),(20,255,255)]
107 | 
108 |     #for each bgcropped file read, pass to crop_image function 
109 |     for file in glob.glob(INPUT_DIR+'*_cropped.png'):
110 |         print(file)
111 |         image_file_name = file.split('\\')[-1]
112 |         crop_square_portion(image_file_name)
113 |     
114 | 


--------------------------------------------------------------------------------
/kaffe/__init__.py:
--------------------------------------------------------------------------------
1 | from .graph import GraphBuilder, NodeMapper
2 | from .errors import KaffeError, print_stderr
3 | 
4 | from . import tensorflow
5 | 


--------------------------------------------------------------------------------
/kaffe/caffe/__init__.py:
--------------------------------------------------------------------------------
1 | from .resolver import get_caffe_resolver, has_pycaffe
2 | 


--------------------------------------------------------------------------------
/kaffe/caffe/resolver.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | SHARED_CAFFE_RESOLVER = None
 4 | 
 5 | class CaffeResolver(object):
 6 |     def __init__(self):
 7 |         self.import_caffe()
 8 | 
 9 |     def import_caffe(self):
10 |         self.caffe = None
11 |         try:
12 |             # Try to import PyCaffe first
13 |             import caffe
14 |             self.caffe = caffe
15 |         except ImportError:
16 |             # Fall back to the protobuf implementation
17 |             from . import caffe_pb2
18 |             self.caffepb = caffe_pb2
19 |             show_fallback_warning()
20 |         if self.caffe:
21 |             # Use the protobuf code from the imported distribution.
22 |             # This way, Caffe variants with custom layers will work.
23 |             self.caffepb = self.caffe.proto.caffe_pb2
24 |         self.NetParameter = self.caffepb.NetParameter
25 | 
26 |     def has_pycaffe(self):
27 |         return self.caffe is not None
28 | 
29 | def get_caffe_resolver():
30 |     global SHARED_CAFFE_RESOLVER
31 |     if SHARED_CAFFE_RESOLVER is None:
32 |         SHARED_CAFFE_RESOLVER = CaffeResolver()
33 |     return SHARED_CAFFE_RESOLVER
34 | 
35 | def has_pycaffe():
36 |     return get_caffe_resolver().has_pycaffe()
37 | 
38 | def show_fallback_warning():
39 |     msg = '''
40 | ------------------------------------------------------------
41 |     WARNING: PyCaffe not found!
42 |     Falling back to a pure protocol buffer implementation.
43 |     * Conversions will be drastically slower.
44 |     * This backend is UNTESTED!
45 | ------------------------------------------------------------
46 | 
47 | '''
48 |     sys.stderr.write(msg)
49 | 


--------------------------------------------------------------------------------
/kaffe/errors.py:
--------------------------------------------------------------------------------
1 | import sys
2 | 
3 | class KaffeError(Exception):
4 |     pass
5 | 
6 | def print_stderr(msg):
7 |     sys.stderr.write('%s\n' % msg)
8 | 


--------------------------------------------------------------------------------
/kaffe/graph.py:
--------------------------------------------------------------------------------
  1 | from google.protobuf import text_format
  2 | 
  3 | from .caffe import get_caffe_resolver
  4 | from .errors import KaffeError, print_stderr
  5 | from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch
  6 | from .shapes import TensorShape
  7 | 
  8 | class Node(object):
  9 | 
 10 |     def __init__(self, name, kind, layer=None):
 11 |         self.name = name
 12 |         self.kind = kind
 13 |         self.layer = LayerAdapter(layer, kind) if layer else None
 14 |         self.parents = []
 15 |         self.children = []
 16 |         self.data = None
 17 |         self.output_shape = None
 18 |         self.metadata = {}
 19 | 
 20 |     def add_parent(self, parent_node):
 21 |         assert parent_node not in self.parents
 22 |         self.parents.append(parent_node)
 23 |         if self not in parent_node.children:
 24 |             parent_node.children.append(self)
 25 | 
 26 |     def add_child(self, child_node):
 27 |         assert child_node not in self.children
 28 |         self.children.append(child_node)
 29 |         if self not in child_node.parents:
 30 |             child_node.parents.append(self)
 31 | 
 32 |     def get_only_parent(self):
 33 |         if len(self.parents) != 1:
 34 |             raise KaffeError('Node (%s) expected to have 1 parent. Found %s.' %
 35 |                              (self, len(self.parents)))
 36 |         return self.parents[0]
 37 | 
 38 |     @property
 39 |     def parameters(self):
 40 |         if self.layer is not None:
 41 |             return self.layer.parameters
 42 |         return None
 43 | 
 44 |     def __str__(self):
 45 |         return '[%s] %s' % (self.kind, self.name)
 46 | 
 47 |     def __repr__(self):
 48 |         return '%s (0x%x)' % (self.name, id(self))
 49 | 
 50 | 
 51 | class Graph(object):
 52 | 
 53 |     def __init__(self, nodes=None, name=None):
 54 |         self.nodes = nodes or []
 55 |         self.node_lut = {node.name: node for node in self.nodes}
 56 |         self.name = name
 57 | 
 58 |     def add_node(self, node):
 59 |         self.nodes.append(node)
 60 |         self.node_lut[node.name] = node
 61 | 
 62 |     def get_node(self, name):
 63 |         try:
 64 |             return self.node_lut[name]
 65 |         except KeyError:
 66 |             raise KaffeError('Layer not found: %s' % name)
 67 | 
 68 |     def get_input_nodes(self):
 69 |         return [node for node in self.nodes if len(node.parents) == 0]
 70 | 
 71 |     def get_output_nodes(self):
 72 |         return [node for node in self.nodes if len(node.children) == 0]
 73 | 
 74 |     def topologically_sorted(self):
 75 |         sorted_nodes = []
 76 |         unsorted_nodes = list(self.nodes)
 77 |         temp_marked = set()
 78 |         perm_marked = set()
 79 | 
 80 |         def visit(node):
 81 |             if node in temp_marked:
 82 |                 raise KaffeError('Graph is not a DAG.')
 83 |             if node in perm_marked:
 84 |                 return
 85 |             temp_marked.add(node)
 86 |             for child in node.children:
 87 |                 visit(child)
 88 |             perm_marked.add(node)
 89 |             temp_marked.remove(node)
 90 |             sorted_nodes.insert(0, node)
 91 | 
 92 |         while len(unsorted_nodes):
 93 |             visit(unsorted_nodes.pop())
 94 |         return sorted_nodes
 95 | 
 96 |     def compute_output_shapes(self):
 97 |         sorted_nodes = self.topologically_sorted()
 98 |         for node in sorted_nodes:
 99 |             node.output_shape = TensorShape(*NodeKind.compute_output_shape(node))
100 | 
101 |     def replaced(self, new_nodes):
102 |         return Graph(nodes=new_nodes, name=self.name)
103 | 
104 |     def transformed(self, transformers):
105 |         graph = self
106 |         for transformer in transformers:
107 |             graph = transformer(graph)
108 |             if graph is None:
109 |                 raise KaffeError('Transformer failed: {}'.format(transformer))
110 |             assert isinstance(graph, Graph)
111 |         return graph
112 | 
113 |     def __contains__(self, key):
114 |         return key in self.node_lut
115 | 
116 |     def __str__(self):
117 |         hdr = '{:<20} {:<30} {:>20} {:>20}'.format('Type', 'Name', 'Param', 'Output')
118 |         s = [hdr, '-' * 94]
119 |         for node in self.topologically_sorted():
120 |             # If the node has learned parameters, display the first one's shape.
121 |             # In case of convolutions, this corresponds to the weights.
122 |             data_shape = node.data[0].shape if node.data else '--'
123 |             out_shape = node.output_shape or '--'
124 |             s.append('{:<20} {:<30} {:>20} {:>20}'.format(node.kind, node.name, data_shape,
125 |                                                           tuple(out_shape)))
126 |         return '\n'.join(s)
127 | 
128 | 
129 | class GraphBuilder(object):
130 |     '''Constructs a model graph from a Caffe protocol buffer definition.'''
131 | 
132 |     def __init__(self, def_path, phase='test'):
133 |         '''
134 |         def_path: Path to the model definition (.prototxt)
135 |         data_path: Path to the model data (.caffemodel)
136 |         phase: Either 'test' or 'train'. Used for filtering phase-specific nodes.
137 |         '''
138 |         self.def_path = def_path
139 |         self.phase = phase
140 |         self.load()
141 | 
142 |     def load(self):
143 |         '''Load the layer definitions from the prototxt.'''
144 |         self.params = get_caffe_resolver().NetParameter()
145 |         with open(self.def_path, 'rb') as def_file:
146 |             text_format.Merge(def_file.read(), self.params)
147 | 
148 |     def filter_layers(self, layers):
149 |         '''Filter out layers based on the current phase.'''
150 |         phase_map = {0: 'train', 1: 'test'}
151 |         filtered_layer_names = set()
152 |         filtered_layers = []
153 |         for layer in layers:
154 |             phase = self.phase
155 |             if len(layer.include):
156 |                 phase = phase_map[layer.include[0].phase]
157 |             if len(layer.exclude):
158 |                 phase = phase_map[1 - layer.include[0].phase]
159 |             exclude = (phase != self.phase)
160 |             # Dropout layers appear in a fair number of Caffe
161 |             # test-time networks. These are just ignored. We'll
162 |             # filter them out here.
163 |             if (not exclude) and (phase == 'test'):
164 |                 exclude = (layer.type == LayerType.Dropout)
165 |             if not exclude:
166 |                 filtered_layers.append(layer)
167 |                 # Guard against dupes.
168 |                 assert layer.name not in filtered_layer_names
169 |                 filtered_layer_names.add(layer.name)
170 |         return filtered_layers
171 | 
172 |     def make_node(self, layer):
173 |         '''Create a graph node for the given layer.'''
174 |         kind = NodeKind.map_raw_kind(layer.type)
175 |         if kind is None:
176 |             raise KaffeError('Unknown layer type encountered: %s' % layer.type)
177 |         # We want to use the layer's top names (the "output" names), rather than the
178 |         # name attribute, which is more of readability thing than a functional one.
179 |         # Other layers will refer to a node by its "top name".
180 |         return Node(layer.name, kind, layer=layer)
181 | 
182 |     def make_input_nodes(self):
183 |         '''
184 |         Create data input nodes.
185 | 
186 |         This method is for old-style inputs, where the input specification
187 |         was not treated as a first-class layer in the prototext.
188 |         Newer models use the "Input layer" type.
189 |         '''
190 |         nodes = [Node(name, NodeKind.Data) for name in self.params.input]
191 |         if len(nodes):
192 |             input_dim = map(int, self.params.input_dim)
193 |             if not input_dim:
194 |                 if len(self.params.input_shape) > 0:
195 |                     input_dim = map(int, self.params.input_shape[0].dim)
196 |                 else:
197 |                     raise KaffeError('Dimensions for input not specified.')
198 |             for node in nodes:
199 |                 node.output_shape = tuple(input_dim)
200 |         return nodes
201 | 
202 |     def build(self):
203 |         '''
204 |         Builds the graph from the Caffe layer definitions.
205 |         '''
206 |         # Get the layers
207 |         layers = self.params.layers or self.params.layer
208 |         # Filter out phase-excluded layers
209 |         layers = self.filter_layers(layers)
210 |         # Get any separately-specified input layers
211 |         nodes = self.make_input_nodes()
212 |         nodes += [self.make_node(layer) for layer in layers]
213 |         # Initialize the graph
214 |         graph = Graph(nodes=nodes, name=self.params.name)
215 |         # Connect the nodes
216 |         #
217 |         # A note on layers and outputs:
218 |         # In Caffe, each layer can produce multiple outputs ("tops") from a set of inputs
219 |         # ("bottoms"). The bottoms refer to other layers' tops. The top can rewrite a bottom
220 |         # (in case of in-place operations). Note that the layer's name is not used for establishing
221 |         # any connectivity. It's only used for data association. By convention, a layer with a
222 |         # single top will often use the same name (although this is not required).
223 |         #
224 |         # The current implementation only supports single-output nodes (note that a node can still
225 |         # have multiple children, since multiple child nodes can refer to the single top's name).
226 |         node_outputs = {}
227 |         for layer in layers:
228 |             node = graph.get_node(layer.name)
229 |             for input_name in layer.bottom:
230 |                 assert input_name != layer.name
231 |                 parent_node = node_outputs.get(input_name)
232 |                 if (parent_node is None) or (parent_node == node):
233 |                     parent_node = graph.get_node(input_name)
234 |                 node.add_parent(parent_node)
235 |             if len(layer.top)>1:
236 |                 raise KaffeError('Multiple top nodes are not supported.')
237 |             for output_name in layer.top:
238 |                 if output_name == layer.name:
239 |                     # Output is named the same as the node. No further action required.
240 |                     continue
241 |                 # There are two possibilities here:
242 |                 #
243 |                 # Case 1: output_name refers to another node in the graph.
244 |                 # This is an "in-place operation" that overwrites an existing node.
245 |                 # This would create a cycle in the graph. We'll undo the in-placing
246 |                 # by substituting this node wherever the overwritten node is referenced.
247 |                 #
248 |                 # Case 2: output_name violates the convention layer.name == output_name.
249 |                 # Since we are working in the single-output regime, we will can rename it to
250 |                 # match the layer name.
251 |                 #
252 |                 # For both cases, future references to this top re-routes to this node.
253 |                 node_outputs[output_name] = node
254 | 
255 |         graph.compute_output_shapes()
256 |         return graph
257 | 
258 | 
259 | class NodeMapper(NodeDispatch):
260 | 
261 |     def __init__(self, graph):
262 |         self.graph = graph
263 | 
264 |     def map(self):
265 |         nodes = self.graph.topologically_sorted()
266 |         # Remove input nodes - we'll handle them separately.
267 |         input_nodes = self.graph.get_input_nodes()
268 |         nodes = [t for t in nodes if t not in input_nodes]
269 |         # Decompose DAG into chains.
270 |         chains = []
271 |         for node in nodes:
272 |             attach_to_chain = None
273 |             if len(node.parents) == 1:
274 |                 parent = node.get_only_parent()
275 |                 for chain in chains:
276 |                     if chain[-1] == parent:
277 |                         # Node is part of an existing chain.
278 |                         attach_to_chain = chain
279 |                         break
280 |             if attach_to_chain is None:
281 |                 # Start a new chain for this node.
282 |                 attach_to_chain = []
283 |                 chains.append(attach_to_chain)
284 |             attach_to_chain.append(node)
285 |         # Map each chain.
286 |         mapped_chains = []
287 |         for chain in chains:
288 |             mapped_chains.append(self.map_chain(chain))
289 |         return self.commit(mapped_chains)
290 | 
291 |     def map_chain(self, chain):
292 |         return [self.map_node(node) for node in chain]
293 | 
294 |     def map_node(self, node):
295 |         map_func = self.get_handler(node.kind, 'map')
296 |         mapped_node = map_func(node)
297 |         assert mapped_node is not None
298 |         mapped_node.node = node
299 |         return mapped_node
300 | 
301 |     def commit(self, mapped_chains):
302 |         raise NotImplementedError('Must be implemented by subclass.')
303 | 


--------------------------------------------------------------------------------
/kaffe/layers.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import numbers
  3 | from collections import namedtuple
  4 | 
  5 | from .shapes import *
  6 | 
  7 | LAYER_DESCRIPTORS = {
  8 | 
  9 |     # Caffe Types
 10 |     'AbsVal': shape_identity,
 11 |     'Accuracy': shape_scalar,
 12 |     'ArgMax': shape_not_implemented,
 13 |     'BatchNorm': shape_identity,
 14 |     'BNLL': shape_not_implemented,
 15 |     'Concat': shape_concat,
 16 |     'ContrastiveLoss': shape_scalar,
 17 |     'Convolution': shape_convolution,
 18 |     'Deconvolution': shape_not_implemented,
 19 |     'Data': shape_data,
 20 |     'Dropout': shape_identity,
 21 |     'DummyData': shape_data,
 22 |     'EuclideanLoss': shape_scalar,
 23 |     'Eltwise': shape_identity,
 24 |     'Exp': shape_identity,
 25 |     'Flatten': shape_not_implemented,
 26 |     'HDF5Data': shape_data,
 27 |     'HDF5Output': shape_identity,
 28 |     'HingeLoss': shape_scalar,
 29 |     'Im2col': shape_not_implemented,
 30 |     'ImageData': shape_data,
 31 |     'InfogainLoss': shape_scalar,
 32 |     'InnerProduct': shape_inner_product,
 33 |     'Input': shape_data,
 34 |     'LRN': shape_identity,
 35 |     'MemoryData': shape_mem_data,
 36 |     'MultinomialLogisticLoss': shape_scalar,
 37 |     'MVN': shape_not_implemented,
 38 |     'Pooling': shape_pool,
 39 |     'Power': shape_identity,
 40 |     'ReLU': shape_identity,
 41 |     'Scale': shape_identity,
 42 |     'Sigmoid': shape_identity,
 43 |     'SigmoidCrossEntropyLoss': shape_scalar,
 44 |     'Silence': shape_not_implemented,
 45 |     'Softmax': shape_identity,
 46 |     'SoftmaxWithLoss': shape_scalar,
 47 |     'Split': shape_not_implemented,
 48 |     'Slice': shape_not_implemented,
 49 |     'TanH': shape_identity,
 50 |     'WindowData': shape_not_implemented,
 51 |     'Threshold': shape_identity,
 52 |     'Interp': shape_not_implemented,
 53 |     'SpatialProduct': shape_not_implemented
 54 | }
 55 | 
 56 | LAYER_TYPES = LAYER_DESCRIPTORS.keys()
 57 | 
 58 | LayerType = type('LayerType', (), {t: t for t in LAYER_TYPES})
 59 | 
 60 | class NodeKind(LayerType):
 61 | 
 62 |     @staticmethod
 63 |     def map_raw_kind(kind):
 64 |         if kind in LAYER_TYPES:
 65 |             return kind
 66 |         return None
 67 | 
 68 |     @staticmethod
 69 |     def compute_output_shape(node):
 70 |         try:
 71 |             val = LAYER_DESCRIPTORS[node.kind](node)
 72 |             return val
 73 |         except NotImplementedError:
 74 |             raise KaffeError('Output shape computation not implemented for type: %s' % node.kind)
 75 | 
 76 | 
 77 | class NodeDispatchError(KaffeError):
 78 | 
 79 |     pass
 80 | 
 81 | 
 82 | class NodeDispatch(object):
 83 | 
 84 |     @staticmethod
 85 |     def get_handler_name(node_kind):
 86 |         if len(node_kind) <= 4:
 87 |             # A catch-all for things like ReLU and tanh
 88 |             return node_kind.lower()
 89 |         # Convert from CamelCase to under_scored
 90 |         name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', node_kind)
 91 |         return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
 92 | 
 93 |     def get_handler(self, node_kind, prefix):
 94 |         name = self.get_handler_name(node_kind)
 95 |         name = '_'.join((prefix, name))
 96 |         try:
 97 |             return getattr(self, name)
 98 |         except AttributeError:
 99 |             raise NodeDispatchError('No handler found for node kind: %s (expected: %s)' %
100 |                                     (node_kind, name))
101 | 
102 | 
103 | class LayerAdapter(object):
104 | 
105 |     def __init__(self, layer, kind):
106 |         self.layer = layer
107 |         self.kind = kind
108 | 
109 |     @property
110 |     def parameters(self):
111 |         name = NodeDispatch.get_handler_name(self.kind)
112 |         name = '_'.join((name, 'param'))
113 |         try:
114 |             return getattr(self.layer, name)
115 |         except AttributeError:
116 |             raise NodeDispatchError('Caffe parameters not found for layer kind: %s' % (self.kind))
117 | 
118 |     @staticmethod
119 |     def get_kernel_value(scalar, repeated, idx, default=None):
120 |         if scalar:
121 |             return scalar
122 |         if repeated:
123 |             if isinstance(repeated, numbers.Number):
124 |                 return repeated
125 |             if len(repeated) == 1:
126 |                 # Same value applies to all spatial dimensions
127 |                 return int(repeated[0])
128 |             assert idx < len(repeated)
129 |             # Extract the value for the given spatial dimension
130 |             return repeated[idx]
131 |         if default is None:
132 |             raise ValueError('Unable to determine kernel parameter!')
133 |         return default
134 | 
135 |     @property
136 |     def kernel_parameters(self):
137 |         assert self.kind in (NodeKind.Convolution, NodeKind.Pooling)
138 |         params = self.parameters
139 |         k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0)
140 |         k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1)
141 |         s_h = self.get_kernel_value(params.stride_h, params.stride, 0, default=1)
142 |         s_w = self.get_kernel_value(params.stride_w, params.stride, 1, default=1)
143 |         p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0)
144 |         p_w = self.get_kernel_value(params.pad_h, params.pad, 1, default=0)
145 |         return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w)
146 | 
147 | 
148 | KernelParameters = namedtuple('KernelParameters', ['kernel_h', 'kernel_w', 'stride_h', 'stride_w',
149 |                                                    'pad_h', 'pad_w'])
150 | 


--------------------------------------------------------------------------------
/kaffe/shapes.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from collections import namedtuple
 3 | 
 4 | from .errors import KaffeError
 5 | 
 6 | TensorShape = namedtuple('TensorShape', ['batch_size', 'channels', 'height', 'width'])
 7 | 
 8 | 
 9 | def get_filter_output_shape(i_h, i_w, params, round_func):
10 |     o_h = (i_h + 2 * params.pad_h - params.kernel_h) / float(params.stride_h) + 1
11 |     o_w = (i_w + 2 * params.pad_w - params.kernel_w) / float(params.stride_w) + 1
12 |     return (int(round_func(o_h)), int(round_func(o_w)))
13 | 
14 | 
15 | def get_strided_kernel_output_shape(node, round_func):
16 |     assert node.layer is not None
17 |     input_shape = node.get_only_parent().output_shape
18 |     o_h, o_w = get_filter_output_shape(input_shape.height, input_shape.width,
19 |                                        node.layer.kernel_parameters, round_func)
20 |     params = node.layer.parameters
21 |     has_c_o = hasattr(params, 'num_output')
22 |     c = params.num_output if has_c_o else input_shape.channels
23 |     return TensorShape(input_shape.batch_size, c, o_h, o_w)
24 | 
25 | 
26 | def shape_not_implemented(node):
27 |     raise NotImplementedError
28 | 
29 | 
30 | def shape_identity(node):
31 |     assert len(node.parents) > 0
32 |     return node.parents[0].output_shape
33 | 
34 | 
35 | def shape_scalar(node):
36 |     return TensorShape(1, 1, 1, 1)
37 | 
38 | 
39 | def shape_data(node):
40 |     if node.output_shape:
41 |         # Old-style input specification
42 |         return node.output_shape
43 |     try:
44 |         # New-style input specification
45 |         return map(int, node.parameters.shape[0].dim)
46 |     except:
47 |         # We most likely have a data layer on our hands. The problem is,
48 |         # Caffe infers the dimensions of the data from the source (eg: LMDB).
49 |         # We want to avoid reading datasets here. Fail for now.
50 |         # This can be temporarily fixed by transforming the data layer to
51 |         # Caffe's "input" layer (as is usually used in the "deploy" version).
52 |         # TODO: Find a better solution for this.
53 |         raise KaffeError('Cannot determine dimensions of data layer.\n'
54 |                          'See comments in function shape_data for more info.')
55 | 
56 | 
57 | def shape_mem_data(node):
58 |     params = node.parameters
59 |     return TensorShape(params.batch_size, params.channels, params.height, params.width)
60 | 
61 | 
62 | def shape_concat(node):
63 |     axis = node.layer.parameters.axis
64 |     output_shape = None
65 |     for parent in node.parents:
66 |         if output_shape is None:
67 |             output_shape = list(parent.output_shape)
68 |         else:
69 |             output_shape[axis] += parent.output_shape[axis]
70 |     return tuple(output_shape)
71 | 
72 | 
73 | def shape_convolution(node):
74 |     return get_strided_kernel_output_shape(node, math.floor)
75 | 
76 | 
77 | def shape_pool(node):
78 |     return get_strided_kernel_output_shape(node, math.ceil)
79 | 
80 | 
81 | def shape_inner_product(node):
82 |     input_shape = node.get_only_parent().output_shape
83 |     return TensorShape(input_shape.batch_size, node.layer.parameters.num_output, 1, 1)
84 | 


--------------------------------------------------------------------------------
/kaffe/tensorflow/__init__.py:
--------------------------------------------------------------------------------
1 | from .transformer import TensorFlowTransformer
2 | from .network import Network
3 | 


--------------------------------------------------------------------------------
/kaffe/tensorflow/network.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | slim = tf.contrib.slim
  4 | 
  5 | DEFAULT_PADDING = 'SAME'
  6 | 
  7 | 
  8 | def layer(op):
  9 |     '''Decorator for composable network layers.'''
 10 | 
 11 |     def layer_decorated(self, *args, **kwargs):
 12 |         # Automatically set a name if not provided.
 13 |         name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
 14 |         # Figure out the layer inputs.
 15 |         if len(self.terminals) == 0:
 16 |             raise RuntimeError('No input variables found for layer %s.' % name)
 17 |         elif len(self.terminals) == 1:
 18 |             layer_input = self.terminals[0]
 19 |         else:
 20 |             layer_input = list(self.terminals)
 21 |         # Perform the operation and get the output.
 22 |         layer_output = op(self, layer_input, *args, **kwargs)
 23 |         # Add to layer LUT.
 24 |         self.layers[name] = layer_output
 25 |         # This output is now the input for the next layer.
 26 |         self.feed(layer_output)
 27 |         # Return self for chained calls.
 28 |         return self
 29 | 
 30 |     return layer_decorated
 31 | 
 32 | 
 33 | class Network(object):
 34 | 
 35 |     def __init__(self, inputs, trainable=True, is_training=False, n_classes=20):
 36 |         # The input nodes for this network
 37 |         self.inputs = inputs
 38 |         # The current list of terminal nodes
 39 |         self.terminals = []
 40 |         # Mapping from layer names to layers
 41 |         self.layers = dict(inputs)
 42 |         # If true, the resulting variables are set as trainable
 43 |         self.trainable = trainable
 44 |         # Switch variable for dropout
 45 |         self.use_dropout = tf.placeholder_with_default(tf.constant(1.0),
 46 |                                                        shape=[],
 47 |                                                        name='use_dropout')
 48 |         self.setup(is_training, n_classes)
 49 | 
 50 |     def setup(self, is_training, n_classes):
 51 |         '''Construct the network. '''
 52 |         raise NotImplementedError('Must be implemented by the subclass.')
 53 | 
 54 |     def load(self, data_path, session, ignore_missing=False):
 55 |         '''Load network weights.
 56 |         data_path: The path to the numpy-serialized network weights
 57 |         session: The current TensorFlow session
 58 |         ignore_missing: If true, serialized weights for missing layers are ignored.
 59 |         '''
 60 |         data_dict = np.load(data_path).item()
 61 |         for op_name in data_dict:
 62 |             with tf.variable_scope(op_name, reuse=True):
 63 |                 for param_name, data in data_dict[op_name].iteritems():
 64 |                     try:
 65 |                         var = tf.get_variable(param_name)
 66 |                         session.run(var.assign(data))
 67 |                     except ValueError:
 68 |                         if not ignore_missing:
 69 |                             raise
 70 | 
 71 |     def feed(self, *args):
 72 |         '''Set the input(s) for the next operation by replacing the terminal nodes.
 73 |         The arguments can be either layer names or the actual layers.
 74 |         '''
 75 |         assert len(args) != 0
 76 |         self.terminals = []
 77 |         for fed_layer in args:
 78 |             if isinstance(fed_layer, str):
 79 |                 try:
 80 |                     fed_layer = self.layers[fed_layer]
 81 |                 except KeyError:
 82 |                     raise KeyError('Unknown layer name fed: %s' % fed_layer)
 83 |             self.terminals.append(fed_layer)
 84 |         return self
 85 | 
 86 |     def get_output(self):
 87 |         '''Returns the current network output.'''
 88 |         return self.terminals[-1]
 89 | 
 90 |     def get_unique_name(self, prefix):
 91 |         '''Returns an index-suffixed unique name for the given prefix.
 92 |         This is used for auto-generating layer names based on the type-prefix.
 93 |         '''
 94 |         ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
 95 |         return '%s_%d' % (prefix, ident)
 96 | 
 97 |     def make_var(self, name, shape):
 98 |         '''Creates a new TensorFlow variable.'''
 99 |         return tf.get_variable(name, shape, trainable=self.trainable)
100 | 
101 |     def validate_padding(self, padding):
102 |         '''Verifies that the padding is one of the supported ones.'''
103 |         assert padding in ('SAME', 'VALID')
104 | 
105 |     @layer
106 |     def conv(self,
107 |              input,
108 |              k_h,
109 |              k_w,
110 |              c_o,
111 |              s_h,
112 |              s_w,
113 |              name,
114 |              relu=True,
115 |              padding=DEFAULT_PADDING,
116 |              group=1,
117 |              biased=True):
118 |         # Verify that the padding is acceptable
119 |         self.validate_padding(padding)
120 |         # Get the number of channels in the input
121 |         c_i = input.get_shape()[-1]
122 |         # Verify that the grouping parameter is valid
123 |         assert c_i % group == 0
124 |         assert c_o % group == 0
125 |         # Convolution for a given input and kernel
126 |         convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
127 |         with tf.variable_scope(name) as scope:
128 |             kernel = self.make_var('weights', shape=[k_h, k_w, int(c_i) / group, c_o])
129 |             if group == 1:
130 |                 # This is the common-case. Convolve the input without any further complications.
131 |                 output = convolve(input, kernel)
132 |             else:
133 |                 # Split the input into groups and then convolve each of them independently
134 |                 input_groups = tf.split(3, group, input)
135 |                 kernel_groups = tf.split(3, group, kernel)
136 |                 output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
137 |                 # Concatenate the groups
138 |                 output = tf.concat(3, output_groups)
139 |             # Add the biases
140 |             if biased:
141 |                 biases = self.make_var('biases', [c_o])
142 |                 output = tf.nn.bias_add(output, biases)
143 |             if relu:
144 |                 # ReLU non-linearity
145 |                 output = tf.nn.relu(output, name=scope.name)
146 |             return output
147 | 
148 |     @layer
149 |     def atrous_conv(self,
150 |                     input,
151 |                     k_h,
152 |                     k_w,
153 |                     c_o,
154 |                     dilation,
155 |                     name,
156 |                     relu=True,
157 |                     padding=DEFAULT_PADDING,
158 |                     group=1,
159 |                     biased=True):
160 |         # Verify that the padding is acceptable
161 |         self.validate_padding(padding)
162 |         # Get the number of channels in the input
163 |         c_i = input.get_shape()[-1]
164 |         # Verify that the grouping parameter is valid
165 |         assert c_i % group == 0
166 |         assert c_o % group == 0
167 |         # Convolution for a given input and kernel
168 |         convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=padding)
169 |         with tf.variable_scope(name) as scope:
170 |             kernel = self.make_var('weights', shape=[k_h, k_w, int(c_i) / group, c_o])
171 |             if group == 1:
172 |                 # This is the common-case. Convolve the input without any further complications.
173 |                 output = convolve(input, kernel)
174 |             else:
175 |                 # Split the input into groups and then convolve each of them independently
176 |                 input_groups = tf.split(3, group, input)
177 |                 kernel_groups = tf.split(3, group, kernel)
178 |                 output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
179 |                 # Concatenate the groups
180 |                 output = tf.concat(3, output_groups)
181 |             # Add the biases
182 |             if biased:
183 |                 biases = self.make_var('biases', [c_o])
184 |                 output = tf.nn.bias_add(output, biases)
185 |             if relu:
186 |                 # ReLU non-linearity
187 |                 output = tf.nn.relu(output, name=scope.name)
188 |             return output
189 |         
190 |     @layer
191 |     def relu(self, input, name):
192 |         return tf.nn.relu(input, name=name)
193 | 
194 |     @layer
195 |     def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
196 |         self.validate_padding(padding)
197 |         return tf.nn.max_pool(input,
198 |                               ksize=[1, k_h, k_w, 1],
199 |                               strides=[1, s_h, s_w, 1],
200 |                               padding=padding,
201 |                               name=name)
202 | 
203 |     @layer
204 |     def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
205 |         self.validate_padding(padding)
206 |         return tf.nn.avg_pool(input,
207 |                               ksize=[1, k_h, k_w, 1],
208 |                               strides=[1, s_h, s_w, 1],
209 |                               padding=padding,
210 |                               name=name)
211 | 
212 |     @layer
213 |     def lrn(self, input, radius, alpha, beta, name, bias=1.0):
214 |         return tf.nn.local_response_normalization(input,
215 |                                                   depth_radius=radius,
216 |                                                   alpha=alpha,
217 |                                                   beta=beta,
218 |                                                   bias=bias,
219 |                                                   name=name)
220 | 
221 |     @layer
222 |     def concat(self, inputs, axis, name):
223 |         return tf.concat(concat_dim=axis, values=inputs, name=name)
224 | 
225 |     @layer
226 |     def add(self, inputs, name):
227 |         return tf.add_n(inputs, name=name)
228 | 
229 |     @layer
230 |     def fc(self, input, num_out, name, relu=True):
231 |         with tf.variable_scope(name) as scope:
232 |             input_shape = input.get_shape()
233 |             if input_shape.ndims == 4:
234 |                 # The input is spatial. Vectorize it first.
235 |                 dim = 1
236 |                 for d in input_shape[1:].as_list():
237 |                     dim *= d
238 |                 feed_in = tf.reshape(input, [-1, dim])
239 |             else:
240 |                 feed_in, dim = (input, input_shape[-1].value)
241 |             weights = self.make_var('weights', shape=[dim, num_out])
242 |             biases = self.make_var('biases', [num_out])
243 |             op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
244 |             fc = op(feed_in, weights, biases, name=scope.name)
245 |             return fc
246 | 
247 |     @layer
248 |     def softmax(self, input, name):
249 |         input_shape = map(lambda v: v.value, input.get_shape())
250 |         if len(input_shape) > 2:
251 |             # For certain models (like NiN), the singleton spatial dimensions
252 |             # need to be explicitly squeezed, since they're not broadcast-able
253 |             # in TensorFlow's NHWC ordering (unlike Caffe's NCHW).
254 |             if input_shape[1] == 1 and input_shape[2] == 1:
255 |                 input = tf.squeeze(input, squeeze_dims=[1, 2])
256 |             else:
257 |                 raise ValueError('Rank 2 tensor input expected for softmax!')
258 |         return tf.nn.softmax(input, name)
259 |         
260 |     @layer
261 |     def batch_normalization(self, input, name, is_training, activation_fn=None, scale=True):
262 |         with tf.variable_scope(name) as scope:
263 |             output = slim.batch_norm(
264 |                 input,
265 |                 activation_fn=activation_fn,
266 |                 is_training=is_training,
267 |                 updates_collections=None,
268 |                 scale=scale,
269 |                 scope=scope)
270 |             return output
271 | 
272 |     @layer
273 |     def dropout(self, input, keep_prob, name):
274 |         keep = 1 - self.use_dropout + (self.use_dropout * keep_prob)
275 |         return tf.nn.dropout(input, keep, name=name)
276 | 


--------------------------------------------------------------------------------
/kaffe/tensorflow/transformer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from ..errors import KaffeError, print_stderr
  4 | from ..graph import GraphBuilder, NodeMapper
  5 | from ..layers import NodeKind
  6 | from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser,
  7 |                             BatchNormScaleBiasFuser, BatchNormPreprocessor, ParameterNamer)
  8 | 
  9 | from . import network
 10 | 
 11 | 
 12 | def get_padding_type(kernel_params, input_shape, output_shape):
 13 |     '''Translates Caffe's numeric padding to one of ('SAME', 'VALID').
 14 |     Caffe supports arbitrary padding values, while TensorFlow only
 15 |     supports 'SAME' and 'VALID' modes. So, not all Caffe paddings
 16 |     can be translated to TensorFlow. There are some subtleties to
 17 |     how the padding edge-cases are handled. These are described here:
 18 |     https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto
 19 |     '''
 20 |     k_h, k_w, s_h, s_w, p_h, p_w = kernel_params
 21 |     s_o_h = np.ceil(input_shape.height / float(s_h))
 22 |     s_o_w = np.ceil(input_shape.width / float(s_w))
 23 |     if (output_shape.height == s_o_h) and (output_shape.width == s_o_w):
 24 |         return 'SAME'
 25 |     v_o_h = np.ceil((input_shape.height - k_h + 1.0) / float(s_h))
 26 |     v_o_w = np.ceil((input_shape.width - k_w + 1.0) / float(s_w))
 27 |     if (output_shape.height == v_o_h) and (output_shape.width == v_o_w):
 28 |         return 'VALID'
 29 |     return None
 30 | 
 31 | 
 32 | class TensorFlowNode(object):
 33 |     '''An intermediate representation for TensorFlow operations.'''
 34 | 
 35 |     def __init__(self, op, *args, **kwargs):
 36 |         # A string corresponding to the TensorFlow operation
 37 |         self.op = op
 38 |         # Positional arguments for the operation
 39 |         self.args = args
 40 |         # Keyword arguments for the operation
 41 |         self.kwargs = list(kwargs.items())
 42 |         # The source Caffe node
 43 |         self.node = None
 44 | 
 45 |     def format(self, arg):
 46 |         '''Returns a string representation for the given value.'''
 47 |         return "'%s'" % arg if isinstance(arg, basestring) else str(arg)
 48 | 
 49 |     def pair(self, key, value):
 50 |         '''Returns key=formatted(value).'''
 51 |         return '%s=%s' % (key, self.format(value))
 52 | 
 53 |     def emit(self):
 54 |         '''Emits the Python source for this node.'''
 55 |         # Format positional arguments
 56 |         args = map(self.format, self.args)
 57 |         # Format any keyword arguments
 58 |         if self.kwargs:
 59 |             args += [self.pair(k, v) for k, v in self.kwargs]
 60 |         # Set the node name
 61 |         args.append(self.pair('name', self.node.name))
 62 |         args = ', '.join(args)
 63 |         return '%s(%s)' % (self.op, args)
 64 | 
 65 | 
 66 | class MaybeActivated(object):
 67 | 
 68 |     def __init__(self, node, default=True):
 69 |         self.inject_kwargs = {}
 70 |         if node.metadata.get('relu', False) != default:
 71 |             self.inject_kwargs['relu'] = not default
 72 | 
 73 |     def __call__(self, *args, **kwargs):
 74 |         kwargs.update(self.inject_kwargs)
 75 |         return TensorFlowNode(*args, **kwargs)
 76 | 
 77 | 
 78 | class TensorFlowMapper(NodeMapper):
 79 | 
 80 |     def get_kernel_params(self, node):
 81 |         kernel_params = node.layer.kernel_parameters
 82 |         input_shape = node.get_only_parent().output_shape
 83 |         padding = get_padding_type(kernel_params, input_shape, node.output_shape)
 84 |         # Only emit the padding if it's not the default value.
 85 |         padding = {'padding': padding} if padding != network.DEFAULT_PADDING else {}
 86 |         return (kernel_params, padding)
 87 | 
 88 |     def map_convolution(self, node):
 89 |         (kernel_params, kwargs) = self.get_kernel_params(node)
 90 |         h = kernel_params.kernel_h
 91 |         w = kernel_params.kernel_w
 92 |         c_o = node.output_shape[1]
 93 |         c_i = node.parents[0].output_shape[1]
 94 |         group = node.parameters.group
 95 |         if group != 1:
 96 |             kwargs['group'] = group
 97 |         if not node.parameters.bias_term:
 98 |             kwargs['biased'] = False
 99 |         assert kernel_params.kernel_h == h
100 |         assert kernel_params.kernel_w == w
101 |         return MaybeActivated(node)('conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o,
102 |                                     kernel_params.stride_h, kernel_params.stride_w, **kwargs)
103 | 
104 |     def map_relu(self, node):
105 |         return TensorFlowNode('relu')
106 | 
107 |     def map_pooling(self, node):
108 |         pool_type = node.parameters.pool
109 |         if pool_type == 0:
110 |             pool_op = 'max_pool'
111 |         elif pool_type == 1:
112 |             pool_op = 'avg_pool'
113 |         else:
114 |             # Stochastic pooling, for instance.
115 |             raise KaffeError('Unsupported pooling type.')
116 |         (kernel_params, padding) = self.get_kernel_params(node)
117 |         return TensorFlowNode(pool_op, kernel_params.kernel_h, kernel_params.kernel_w,
118 |                               kernel_params.stride_h, kernel_params.stride_w, **padding)
119 | 
120 |     def map_inner_product(self, node):
121 |         #TODO: Axis
122 |         assert node.parameters.axis == 1
123 |         #TODO: Unbiased
124 |         assert node.parameters.bias_term == True
125 |         return MaybeActivated(node)('fc', node.parameters.num_output)
126 | 
127 |     def map_softmax(self, node):
128 |         return TensorFlowNode('softmax')
129 | 
130 |     def map_lrn(self, node):
131 |         params = node.parameters
132 |         # The window size must be an odd value. For a window
133 |         # size of (2*n+1), TensorFlow defines depth_radius = n.
134 |         assert params.local_size % 2 == 1
135 |         # Caffe scales by (alpha/(2*n+1)), whereas TensorFlow
136 |         # just scales by alpha (as does Krizhevsky's paper).
137 |         # We'll account for that here.
138 |         alpha = params.alpha / float(params.local_size)
139 |         return TensorFlowNode('lrn', int(params.local_size / 2), alpha, params.beta)
140 | 
141 |     def map_concat(self, node):
142 |         axis = (2, 3, 1, 0)[node.parameters.axis]
143 |         return TensorFlowNode('concat', axis)
144 | 
145 |     def map_dropout(self, node):
146 |         return TensorFlowNode('dropout', node.parameters.dropout_ratio)
147 | 
148 |     def map_batch_norm(self, node):
149 |         scale_offset = len(node.data) == 4
150 |         kwargs = {'is_training': True} if scale_offset else {'is_training': True, 'scale': False}
151 |         return MaybeActivated(node, default=False)('batch_normalization', **kwargs)
152 | 
153 |     def map_eltwise(self, node):
154 |         operations = {0: 'multiply', 1: 'add', 2: 'max'}
155 |         op_code = node.parameters.operation
156 |         try:
157 |             return TensorFlowNode(operations[op_code])
158 |         except KeyError:
159 |             raise KaffeError('Unknown elementwise operation: {}'.format(op_code))
160 | 
161 |     def commit(self, chains):
162 |         return chains
163 | 
164 | 
165 | class TensorFlowEmitter(object):
166 | 
167 |     def __init__(self, tab=None):
168 |         self.tab = tab or ' ' * 4
169 |         self.prefix = ''
170 | 
171 |     def indent(self):
172 |         self.prefix += self.tab
173 | 
174 |     def outdent(self):
175 |         self.prefix = self.prefix[:-len(self.tab)]
176 | 
177 |     def statement(self, s):
178 |         return self.prefix + s + '\n'
179 | 
180 |     def emit_imports(self):
181 |         return self.statement('from kaffe.tensorflow import Network\n')
182 | 
183 |     def emit_class_def(self, name):
184 |         return self.statement('class %s(Network):' % (name))
185 | 
186 |     def emit_setup_def(self):
187 |         return self.statement('def setup(self):')
188 | 
189 |     def emit_parents(self, chain):
190 |         assert len(chain)
191 |         s = '(self.feed('
192 |         sep = ', \n' + self.prefix + (' ' * len(s))
193 |         s += sep.join(["'%s'" % parent.name for parent in chain[0].node.parents])
194 |         return self.statement(s + ')')
195 | 
196 |     def emit_node(self, node):
197 |         return self.statement(' ' * 5 + '.' + node.emit())
198 | 
199 |     def emit(self, name, chains):
200 |         s = self.emit_imports()
201 |         s += self.emit_class_def(name)
202 |         self.indent()
203 |         s += self.emit_setup_def()
204 |         self.indent()
205 |         blocks = []
206 |         for chain in chains:
207 |             b = ''
208 |             b += self.emit_parents(chain)
209 |             for node in chain:
210 |                 b += self.emit_node(node)
211 |             blocks.append(b[:-1] + ')')
212 |         s = s + '\n\n'.join(blocks)
213 |         return s
214 | 
215 | 
216 | class TensorFlowTransformer(object):
217 | 
218 |     def __init__(self, def_path, data_path, verbose=True, phase='test'):
219 |         self.verbose = verbose
220 |         self.phase = phase
221 |         self.load(def_path, data_path, phase)
222 |         self.params = None
223 |         self.source = None
224 | 
225 |     def load(self, def_path, data_path, phase):
226 |         # Build the graph
227 |         graph = GraphBuilder(def_path, phase).build()
228 | 
229 |         if data_path is not None:
230 |             # Load and associate learned parameters
231 |             graph = DataInjector(def_path, data_path)(graph)
232 | 
233 |         # Transform the graph
234 |         transformers = [
235 |             # Fuse split batch normalization layers
236 |             BatchNormScaleBiasFuser(),
237 | 
238 |             # Fuse ReLUs
239 |             # TODO: Move non-linearity application to layer wrapper, allowing
240 |             # any arbitrary operation to be optionally activated.
241 |             ReLUFuser(allowed_parent_types=[NodeKind.Convolution, NodeKind.InnerProduct,
242 |                                             NodeKind.BatchNorm]),
243 | 
244 |             # Rename nodes
245 |             # Slashes are used for scoping in TensorFlow. Replace slashes
246 |             # in node names with underscores.
247 |             # (Caffe's GoogLeNet implementation uses slashes)
248 |             NodeRenamer(lambda node: node.name.replace('/', '_'))
249 |         ]
250 |         self.graph = graph.transformed(transformers)
251 | 
252 |         # Display the graph
253 |         if self.verbose:
254 |             print_stderr(self.graph)
255 | 
256 |     def transform_data(self):
257 |         if self.params is None:
258 |             transformers = [
259 | 
260 |                 # Reshape the parameters to TensorFlow's ordering
261 |                 DataReshaper({
262 |                     # (c_o, c_i, h, w) -> (h, w, c_i, c_o)
263 |                     NodeKind.Convolution: (2, 3, 1, 0),
264 | 
265 |                     # (c_o, c_i) -> (c_i, c_o)
266 |                     NodeKind.InnerProduct: (1, 0)
267 |                 }),
268 | 
269 |                 # Pre-process batch normalization data
270 |                 BatchNormPreprocessor(),
271 | 
272 |                 # Convert parameters to dictionaries
273 |                 ParameterNamer(),
274 |             ]
275 |             self.graph = self.graph.transformed(transformers)
276 |             self.params = {node.name: node.data for node in self.graph.nodes if node.data}
277 |         return self.params
278 | 
279 |     def transform_source(self):
280 |         if self.source is None:
281 |             mapper = TensorFlowMapper(self.graph)
282 |             chains = mapper.map()
283 |             emitter = TensorFlowEmitter()
284 |             self.source = emitter.emit(self.graph.name, chains)
285 |         return self.source
286 | 


--------------------------------------------------------------------------------
/kaffe/transformers.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | A collection of graph transforms.
  3 | 
  4 | A transformer is a callable that accepts a graph and returns a transformed version.
  5 | '''
  6 | 
  7 | import numpy as np
  8 | 
  9 | from .caffe import get_caffe_resolver, has_pycaffe
 10 | from .errors import KaffeError, print_stderr
 11 | from .layers import NodeKind
 12 | 
 13 | 
 14 | class DataInjector(object):
 15 |     '''
 16 |     Associates parameters loaded from a .caffemodel file with their corresponding nodes.
 17 |     '''
 18 | 
 19 |     def __init__(self, def_path, data_path):
 20 |         # The .prototxt file defining the graph
 21 |         self.def_path = def_path
 22 |         # The .caffemodel file containing the learned parameters
 23 |         self.data_path = data_path
 24 |         # Set to true if the fallback protocol-buffer based backend was used
 25 |         self.did_use_pb = False
 26 |         # A list containing (layer name, parameters) tuples
 27 |         self.params = None
 28 |         # Load the parameters
 29 |         self.load()
 30 | 
 31 |     def load(self):
 32 |         if has_pycaffe():
 33 |             self.load_using_caffe()
 34 |         else:
 35 |             self.load_using_pb()
 36 | 
 37 |     def load_using_caffe(self):
 38 |         caffe = get_caffe_resolver().caffe
 39 |         net = caffe.Net(self.def_path, self.data_path, caffe.TEST)
 40 |         data = lambda blob: blob.data
 41 |         self.params = [(k, map(data, v)) for k, v in net.params.items()]
 42 | 
 43 |     def load_using_pb(self):
 44 |         data = get_caffe_resolver().NetParameter()
 45 |         data.MergeFromString(open(self.data_path, 'rb').read())
 46 |         pair = lambda layer: (layer.name, self.normalize_pb_data(layer))
 47 |         layers = data.layers or data.layer
 48 |         self.params = [pair(layer) for layer in layers if layer.blobs]
 49 |         self.did_use_pb = True
 50 | 
 51 |     def normalize_pb_data(self, layer):
 52 |         transformed = []
 53 |         for blob in layer.blobs:
 54 |             if len(blob.shape.dim):
 55 |                 dims = blob.shape.dim
 56 |                 c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims))
 57 |             else:
 58 |                 c_o = blob.num
 59 |                 c_i = blob.channels
 60 |                 h = blob.height
 61 |                 w = blob.width
 62 |             data = np.array(blob.data, dtype=np.float32).reshape(c_o, c_i, h, w)
 63 |             transformed.append(data)
 64 |         return transformed
 65 | 
 66 |     def adjust_parameters(self, node, data):
 67 |         if not self.did_use_pb:
 68 |             return data
 69 |         # When using the protobuf-backend, each parameter initially has four dimensions.
 70 |         # In certain cases (like FC layers), we want to eliminate the singleton dimensions.
 71 |         # This implementation takes care of the common cases. However, it does leave the
 72 |         # potential for future issues.
 73 |         # The Caffe-backend does not suffer from this problem.
 74 |         data = list(data)
 75 |         squeeze_indices = [1]  # Squeeze biases.
 76 |         if node.kind == NodeKind.InnerProduct:
 77 |             squeeze_indices.append(0)  # Squeeze FC.
 78 |         for idx in squeeze_indices:
 79 |             data[idx] = np.squeeze(data[idx])
 80 |         return data
 81 | 
 82 |     def __call__(self, graph):
 83 |         for layer_name, data in self.params:
 84 |             if layer_name in graph:
 85 |                 node = graph.get_node(layer_name)
 86 |                 node.data = self.adjust_parameters(node, data)
 87 |             else:
 88 |                 print_stderr('Ignoring parameters for non-existent layer: %s' % layer_name)
 89 |         return graph
 90 | 
 91 | 
 92 | class DataReshaper(object):
 93 | 
 94 |     def __init__(self, mapping, replace=True):
 95 |         # A dictionary mapping NodeKind to the transposed order.
 96 |         self.mapping = mapping
 97 |         # The node kinds eligible for reshaping
 98 |         self.reshaped_node_types = self.mapping.keys()
 99 |         # If true, the reshaped data will replace the old one.
100 |         # Otherwise, it's set to the reshaped_data attribute.
101 |         self.replace = replace
102 | 
103 |     def has_spatial_parent(self, node):
104 |         try:
105 |             parent = node.get_only_parent()
106 |             s = parent.output_shape
107 |             return s.height > 1 or s.width > 1
108 |         except KaffeError:
109 |             return False
110 | 
111 |     def map(self, node_kind):
112 |         try:
113 |             return self.mapping[node_kind]
114 |         except KeyError:
115 |             raise KaffeError('Ordering not found for node kind: {}'.format(node_kind))
116 | 
117 |     def __call__(self, graph):
118 |         for node in graph.nodes:
119 |             if node.data is None:
120 |                 continue
121 |             if node.kind not in self.reshaped_node_types:
122 |                 # Check for 2+ dimensional data
123 |                 if any(len(tensor.shape) > 1 for tensor in node.data):
124 |                     print_stderr('Warning: parmaters not reshaped for node: {}'.format(node))
125 |                 continue
126 |             transpose_order = self.map(node.kind)
127 |             weights = node.data[0]
128 |             if (node.kind == NodeKind.InnerProduct) and self.has_spatial_parent(node):
129 |                 # The FC layer connected to the spatial layer needs to be
130 |                 # re-wired to match the new spatial ordering.
131 |                 in_shape = node.get_only_parent().output_shape
132 |                 fc_shape = weights.shape
133 |                 output_channels = fc_shape[0]
134 |                 weights = weights.reshape((output_channels, in_shape.channels, in_shape.height,
135 |                                            in_shape.width))
136 |                 weights = weights.transpose(self.map(NodeKind.Convolution))
137 |                 node.reshaped_data = weights.reshape(fc_shape[transpose_order[0]],
138 |                                                      fc_shape[transpose_order[1]])
139 |             else:
140 |                 node.reshaped_data = weights.transpose(transpose_order)
141 | 
142 |         if self.replace:
143 |             for node in graph.nodes:
144 |                 if hasattr(node, 'reshaped_data'):
145 |                     # Set the weights
146 |                     node.data[0] = node.reshaped_data
147 |                     del node.reshaped_data
148 |         return graph
149 | 
150 | 
151 | class SubNodeFuser(object):
152 |     '''
153 |     An abstract helper for merging a single-child with its single-parent.
154 |     '''
155 | 
156 |     def __call__(self, graph):
157 |         nodes = graph.nodes
158 |         fused_nodes = []
159 |         for node in nodes:
160 |             if len(node.parents) != 1:
161 |                 # We're only fusing nodes with single parents
162 |                 continue
163 |             parent = node.get_only_parent()
164 |             if len(parent.children) != 1:
165 |                 # We can only fuse a node if its parent's
166 |                 # value isn't used by any other node.
167 |                 continue
168 |             if not self.is_eligible_pair(parent, node):
169 |                 continue
170 |             # Rewrite the fused node's children to its parent.
171 |             for child in node.children:
172 |                 child.parents.remove(node)
173 |                 parent.add_child(child)
174 |             # Disconnect the fused node from the graph.
175 |             parent.children.remove(node)
176 |             fused_nodes.append(node)
177 |             # Let the sub-class merge the fused node in any arbitrary way.
178 |             self.merge(parent, node)
179 |         transformed_nodes = [node for node in nodes if node not in fused_nodes]
180 |         return graph.replaced(transformed_nodes)
181 | 
182 |     def is_eligible_pair(self, parent, child):
183 |         '''Returns true if this parent/child pair is eligible for fusion.'''
184 |         raise NotImplementedError('Must be implemented by subclass.')
185 | 
186 |     def merge(self, parent, child):
187 |         '''Merge the child node into the parent.'''
188 |         raise NotImplementedError('Must be implemented by subclass')
189 | 
190 | 
191 | class ReLUFuser(SubNodeFuser):
192 |     '''
193 |     Fuses rectified linear units with their parent nodes.
194 |     '''
195 | 
196 |     def __init__(self, allowed_parent_types=None):
197 |         # Fuse ReLUs when the parent node is one of the given types.
198 |         # If None, all node types are eligible.
199 |         self.allowed_parent_types = allowed_parent_types
200 | 
201 |     def is_eligible_pair(self, parent, child):
202 |         return ((self.allowed_parent_types is None or parent.kind in self.allowed_parent_types) and
203 |                 child.kind == NodeKind.ReLU)
204 | 
205 |     def merge(self, parent, _):
206 |         parent.metadata['relu'] = True
207 | 
208 | 
209 | class BatchNormScaleBiasFuser(SubNodeFuser):
210 |     '''
211 |     The original batch normalization paper includes two learned
212 |     parameters: a scaling factor \gamma and a bias \beta.
213 |     Caffe's implementation does not include these two. However, it is commonly
214 |     replicated by adding a scaling+bias layer immidiately after the batch norm.
215 | 
216 |     This fuser merges the scaling+bias layer with the batch norm.
217 |     '''
218 | 
219 |     def is_eligible_pair(self, parent, child):
220 |         return (parent.kind == NodeKind.BatchNorm and child.kind == NodeKind.Scale and
221 |                 child.parameters.axis == 1 and child.parameters.bias_term == True)
222 | 
223 |     def merge(self, parent, child):
224 |         parent.scale_bias_node = child
225 | 
226 | 
227 | class BatchNormPreprocessor(object):
228 |     '''
229 |     Prescale batch normalization parameters.
230 |     Concatenate gamma (scale) and beta (bias) terms if set.
231 |     '''
232 | 
233 |     def __call__(self, graph):
234 |         for node in graph.nodes:
235 |             if node.kind != NodeKind.BatchNorm:
236 |                 continue
237 |             assert node.data is not None
238 |             assert len(node.data) == 3
239 |             mean, variance, scale = node.data
240 |             # Prescale the stats
241 |             scaling_factor = 1.0 / scale if scale != 0 else 0
242 |             mean *= scaling_factor
243 |             variance *= scaling_factor
244 |             # Replace with the updated values
245 |             node.data = [mean, variance]
246 |             if hasattr(node, 'scale_bias_node'):
247 |                 # Include the scale and bias terms
248 |                 gamma, beta = node.scale_bias_node.data
249 |                 node.data += [gamma, beta]
250 |         return graph
251 | 
252 | 
253 | class NodeRenamer(object):
254 |     '''
255 |     Renames nodes in the graph using a given unary function that
256 |     accepts a node and returns its new name.
257 |     '''
258 | 
259 |     def __init__(self, renamer):
260 |         self.renamer = renamer
261 | 
262 |     def __call__(self, graph):
263 |         for node in graph.nodes:
264 |             node.name = self.renamer(node)
265 |         return graph
266 | 
267 | 
268 | class ParameterNamer(object):
269 |     '''
270 |     Convert layer data arrays to a dictionary mapping parameter names to their values.
271 |     '''
272 | 
273 |     def __call__(self, graph):
274 |         for node in graph.nodes:
275 |             if node.data is None:
276 |                 continue
277 |             if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct):
278 |                 names = ('weights',)
279 |                 if node.parameters.bias_term:
280 |                     names += ('biases',)
281 |             elif node.kind == NodeKind.BatchNorm:
282 |                 names = ('moving_mean', 'moving_variance')
283 |                 if len(node.data) == 4:
284 |                     names += ('gamma', 'beta')
285 |             else:
286 |                 print_stderr('WARNING: Unhandled parameters: {}'.format(node.kind))
287 |                 continue
288 |             assert len(names) == len(node.data)
289 |             node.data = dict(zip(names, node.data))
290 |         return graph
291 | 


--------------------------------------------------------------------------------
/train_JPPNet-s2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import time
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | import random
  7 | from utils import *
  8 | from LIP_model import *
  9 | 
 10 | # Set gpus
 11 | gpus = [0] # Here I set CUDA to only see one GPU
 12 | os.environ["CUDA_VISIBLE_DEVICES"]=','.join([str(i) for i in gpus])
 13 | num_gpus = len(gpus) # number of GPUs to use
 14 | 
 15 | ### parameters setting
 16 | N_CLASSES = 20
 17 | INPUT_SIZE = (384, 384)
 18 | BATCH_SIZE = 1
 19 | BATCH_I = 1
 20 | SHUFFLE = True
 21 | RANDOM_SCALE = True
 22 | RANDOM_MIRROR = True
 23 | LEARNING_RATE = 1e-4
 24 | MOMENTUM = 0.9
 25 | POWER = 0.9
 26 | NUM_STEPS = 7616 * 35 + 1
 27 | SAVE_PRED_EVERY = 7616 
 28 | p_Weight = 1
 29 | s_Weight = 1
 30 | DATA_DIR = './datasets/lip'
 31 | LIST_PATH = './datasets/lip/list/train_rev.txt'
 32 | DATA_ID_LIST = './datasets/lip/list/train_id.txt'
 33 | SNAPSHOT_DIR = './checkpoint/JPPNet-s2'
 34 | LOG_DIR = './logs/JPPNet-s2'
 35 | 
 36 | 
 37 | def main():
 38 |     RANDOM_SEED = random.randint(1000, 9999)
 39 |     tf.set_random_seed(RANDOM_SEED)
 40 | 
 41 |     # Create queue coordinator.
 42 |     coord = tf.train.Coordinator()
 43 |     h, w = INPUT_SIZE
 44 |     # Load reader.
 45 |     with tf.name_scope("create_inputs"):
 46 |         reader = LIPReader(DATA_DIR, LIST_PATH, DATA_ID_LIST, INPUT_SIZE, RANDOM_SCALE, RANDOM_MIRROR, SHUFFLE, coord)
 47 |         image_batch, label_batch, heatmap_batch = reader.dequeue(BATCH_SIZE)
 48 |         image_batch075 = tf.image.resize_images(image_batch, [int(h * 0.75), int(w * 0.75)])
 49 |         image_batch050 = tf.image.resize_images(image_batch, [int(h * 0.5), int(w * 0.5)])
 50 |         heatmap_batch = tf.scalar_mul(1.0/255, heatmap_batch)
 51 | 
 52 |     tower_grads = []
 53 |     reuse1 = False
 54 |     reuse2 = False
 55 |     # Define loss and optimisation parameters.
 56 |     base_lr = tf.constant(LEARNING_RATE)
 57 |     step_ph = tf.placeholder(dtype=tf.float32, shape=())
 58 |     learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / NUM_STEPS), POWER))
 59 |     optim = tf.train.MomentumOptimizer(learning_rate, MOMENTUM)
 60 | 
 61 |     for i in xrange (num_gpus):
 62 |         with tf.device('/gpu:%d' % i):
 63 |             with tf.name_scope('Tower_%d' % (i)) as scope:
 64 |                 if i == 0:
 65 |                     reuse1 = False
 66 |                     reuse2 = True
 67 |                 else:
 68 |                     reuse1 = True
 69 |                     reuse2 = True
 70 |                 next_image = image_batch[i*BATCH_I:(i+1)*BATCH_I,:]
 71 |                 next_image075 = image_batch075[i*BATCH_I:(i+1)*BATCH_I,:]
 72 |                 next_image050 = image_batch050[i*BATCH_I:(i+1)*BATCH_I,:]
 73 |                 next_heatmap = heatmap_batch[i*BATCH_I:(i+1)*BATCH_I,:]
 74 |                 next_label = label_batch[i*BATCH_I:(i+1)*BATCH_I,:]
 75 | 
 76 |                 # Create network.
 77 |                 with tf.variable_scope('', reuse=reuse1):
 78 |                     net_100 = JPPNetModel({'data': next_image}, is_training=False, n_classes=N_CLASSES)
 79 |                 with tf.variable_scope('', reuse=reuse2):
 80 |                     net_075 = JPPNetModel({'data': next_image075}, is_training=False, n_classes=N_CLASSES)
 81 |                 with tf.variable_scope('', reuse=reuse2):
 82 |                     net_050 = JPPNetModel({'data': next_image050}, is_training=False, n_classes=N_CLASSES)
 83 | 
 84 |                 # parsing net
 85 |                 parsing_fea1_100 = net_100.layers['res5d_branch2b_parsing']
 86 |                 parsing_fea1_075 = net_075.layers['res5d_branch2b_parsing']
 87 |                 parsing_fea1_050 = net_050.layers['res5d_branch2b_parsing']
 88 | 
 89 |                 parsing_out1_100 = net_100.layers['fc1_human']
 90 |                 parsing_out1_075 = net_075.layers['fc1_human']
 91 |                 parsing_out1_050 = net_050.layers['fc1_human']
 92 |                 # pose net
 93 |                 resnet_fea_100 = net_100.layers['res4b22_relu']
 94 |                 resnet_fea_075 = net_075.layers['res4b22_relu']
 95 |                 resnet_fea_050 = net_050.layers['res4b22_relu']
 96 |                 
 97 |                 with tf.variable_scope('', reuse=reuse1):
 98 |                     pose_out1_100, pose_fea1_100 = pose_net(resnet_fea_100, 'fc1_pose')
 99 |                     pose_out2_100, pose_fea2_100 = pose_refine(pose_out1_100, parsing_out1_100, pose_fea1_100, name='fc2_pose')
100 |                     parsing_out2_100, parsing_fea2_100 = parsing_refine(parsing_out1_100, pose_out1_100, parsing_fea1_100, name='fc2_parsing')
101 |                     parsing_out3_100, parsing_fea3_100 = parsing_refine(parsing_out2_100, pose_out2_100, parsing_fea2_100, name='fc3_parsing')
102 |                     pose_out3_100, pose_fea3_100 = pose_refine(pose_out2_100, parsing_out2_100, pose_fea2_100, name='fc3_pose')
103 | 
104 |                 with tf.variable_scope('', reuse=reuse2):
105 |                     pose_out1_075, pose_fea1_075 = pose_net(resnet_fea_075, 'fc1_pose')
106 |                     pose_out2_075, pose_fea2_075 = pose_refine(pose_out1_075, parsing_out1_075, pose_fea1_075, name='fc2_pose')
107 |                     parsing_out2_075, parsing_fea2_075 = parsing_refine(parsing_out1_075, pose_out1_075, parsing_fea1_075, name='fc2_parsing')
108 |                     parsing_out3_075, parsing_fea3_075 = parsing_refine(parsing_out2_075, pose_out2_075, parsing_fea2_075, name='fc3_parsing')
109 |                     pose_out3_075, pose_fea3_075 = pose_refine(pose_out2_075, parsing_out2_075, pose_fea2_075, name='fc3_pose')
110 | 
111 |                 with tf.variable_scope('', reuse=reuse2):
112 |                     pose_out1_050, pose_fea1_050 = pose_net(resnet_fea_050, 'fc1_pose')
113 |                     pose_out2_050, pose_fea2_050 = pose_refine(pose_out1_050, parsing_out1_050, pose_fea1_050, name='fc2_pose')
114 |                     parsing_out2_050, parsing_fea2_050 = parsing_refine(parsing_out1_050, pose_out1_050, parsing_fea1_050, name='fc2_parsing')
115 |                     parsing_out3_050, parsing_fea3_050 = parsing_refine(parsing_out2_050, pose_out2_050, parsing_fea2_050, name='fc3_parsing')
116 |                     pose_out3_050, pose_fea3_050 = pose_refine(pose_out2_050, parsing_out2_050, pose_fea2_050, name='fc3_pose')
117 | 
118 |                 # combine resize
119 |                 parsing_out1 = tf.reduce_mean(tf.stack([parsing_out1_100,
120 |                                                      tf.image.resize_images(parsing_out1_075, tf.shape(parsing_out1_100)[1:3,]),
121 |                                                      tf.image.resize_images(parsing_out1_050, tf.shape(parsing_out1_100)[1:3,])]), axis=0)
122 |                 parsing_out2 = tf.reduce_mean(tf.stack([parsing_out2_100,
123 |                                                      tf.image.resize_images(parsing_out2_075, tf.shape(parsing_out2_100)[1:3,]),
124 |                                                      tf.image.resize_images(parsing_out2_050, tf.shape(parsing_out2_100)[1:3,])]), axis=0)
125 |                 parsing_out3 = tf.reduce_mean(tf.stack([parsing_out3_100,
126 |                                                      tf.image.resize_images(parsing_out3_075, tf.shape(parsing_out3_100)[1:3,]),
127 |                                                      tf.image.resize_images(parsing_out3_050, tf.shape(parsing_out3_100)[1:3,])]), axis=0)
128 |                 pose_out1 = tf.reduce_mean(tf.stack([pose_out1_100,
129 |                                                      tf.image.resize_nearest_neighbor(pose_out1_075, tf.shape(pose_out1_100)[1:3,]),
130 |                                                      tf.image.resize_nearest_neighbor(pose_out1_050, tf.shape(pose_out1_100)[1:3,])]), axis=0)
131 |                 pose_out2 = tf.reduce_mean(tf.stack([pose_out2_100,
132 |                                                      tf.image.resize_nearest_neighbor(pose_out2_075, tf.shape(pose_out2_100)[1:3,]),
133 |                                                      tf.image.resize_nearest_neighbor(pose_out2_050, tf.shape(pose_out2_100)[1:3,])]), axis=0)
134 |                 pose_out3 = tf.reduce_mean(tf.stack([pose_out3_100,
135 |                                                      tf.image.resize_nearest_neighbor(pose_out3_075, tf.shape(pose_out3_100)[1:3,]),
136 |                                                      tf.image.resize_nearest_neighbor(pose_out3_050, tf.shape(pose_out3_100)[1:3,])]), axis=0)
137 | 
138 |                 ### Predictions: ignoring all predictions with labels greater or equal than n_classes
139 |                 raw_prediction_p1 = tf.reshape(parsing_out1, [-1, N_CLASSES])
140 |                 raw_prediction_p1_100 = tf.reshape(parsing_out1_100, [-1, N_CLASSES])
141 |                 raw_prediction_p1_075 = tf.reshape(parsing_out1_075, [-1, N_CLASSES])
142 |                 raw_prediction_p1_050 = tf.reshape(parsing_out1_050, [-1, N_CLASSES])
143 | 
144 |                 raw_prediction_p2 = tf.reshape(parsing_out2, [-1, N_CLASSES])
145 |                 raw_prediction_p2_100 = tf.reshape(parsing_out2_100, [-1, N_CLASSES])
146 |                 raw_prediction_p2_075 = tf.reshape(parsing_out2_075, [-1, N_CLASSES])
147 |                 raw_prediction_p2_050 = tf.reshape(parsing_out2_050, [-1, N_CLASSES])
148 | 
149 |                 raw_prediction_p3 = tf.reshape(parsing_out3, [-1, N_CLASSES])
150 |                 raw_prediction_p3_100 = tf.reshape(parsing_out3_100, [-1, N_CLASSES])
151 |                 raw_prediction_p3_075 = tf.reshape(parsing_out3_075, [-1, N_CLASSES])
152 |                 raw_prediction_p3_050 = tf.reshape(parsing_out3_050, [-1, N_CLASSES])
153 | 
154 |                 label_proc = prepare_label(next_label, tf.stack(parsing_out1.get_shape()[1:3]), one_hot=False) # [batch_size, h, w]
155 |                 label_proc075 = prepare_label(next_label, tf.stack(parsing_out1_075.get_shape()[1:3]), one_hot=False)
156 |                 label_proc050 = prepare_label(next_label, tf.stack(parsing_out1_050.get_shape()[1:3]), one_hot=False)
157 | 
158 |                 raw_gt = tf.reshape(label_proc, [-1,])
159 |                 raw_gt075 = tf.reshape(label_proc075, [-1,])
160 |                 raw_gt050 = tf.reshape(label_proc050, [-1,])
161 | 
162 |                 indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, N_CLASSES - 1)), 1)
163 |                 indices075 = tf.squeeze(tf.where(tf.less_equal(raw_gt075, N_CLASSES - 1)), 1)
164 |                 indices050 = tf.squeeze(tf.where(tf.less_equal(raw_gt050, N_CLASSES - 1)), 1)
165 | 
166 |                 gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
167 |                 gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32)
168 |                 gt050 = tf.cast(tf.gather(raw_gt050, indices050), tf.int32)
169 | 
170 |                 prediction_p1 = tf.gather(raw_prediction_p1, indices)
171 |                 prediction_p1_100 = tf.gather(raw_prediction_p1_100, indices)
172 |                 prediction_p1_075 = tf.gather(raw_prediction_p1_075, indices075)
173 |                 prediction_p1_050 = tf.gather(raw_prediction_p1_050, indices050)
174 | 
175 |                 prediction_p2 = tf.gather(raw_prediction_p2, indices)
176 |                 prediction_p2_100 = tf.gather(raw_prediction_p2_100, indices)
177 |                 prediction_p2_075 = tf.gather(raw_prediction_p2_075, indices075)
178 |                 prediction_p2_050 = tf.gather(raw_prediction_p2_050, indices050)
179 | 
180 |                 prediction_p3 = tf.gather(raw_prediction_p3, indices)
181 |                 prediction_p3_100 = tf.gather(raw_prediction_p3_100, indices)
182 |                 prediction_p3_075 = tf.gather(raw_prediction_p3_075, indices075)
183 |                 prediction_p3_050 = tf.gather(raw_prediction_p3_050, indices050)
184 | 
185 |                 next_heatmap075 = tf.image.resize_nearest_neighbor(next_heatmap, pose_out1_075.get_shape()[1:3])
186 |                 next_heatmap050 = tf.image.resize_nearest_neighbor(next_heatmap, pose_out1_050.get_shape()[1:3])
187 | 
188 |                 ### Pixel-wise softmax loss.
189 |                 loss_p1 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p1, labels=gt))
190 |                 loss_p1_100 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p1_100, labels=gt))
191 |                 loss_p1_075 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p1_075, labels=gt075))
192 |                 loss_p1_050 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p1_050, labels=gt050))
193 | 
194 |                 loss_p2 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p2, labels=gt))
195 |                 loss_p2_100 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p2_100, labels=gt))
196 |                 loss_p2_075 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p2_075, labels=gt075))
197 |                 loss_p2_050 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p2_050, labels=gt050))
198 | 
199 |                 loss_p3 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p3, labels=gt))
200 |                 loss_p3_100 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p3_100, labels=gt))
201 |                 loss_p3_075 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p3_075, labels=gt075))
202 |                 loss_p3_050 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction_p3_050, labels=gt050))
203 | 
204 |                 loss_s1 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out1)), [1, 2, 3])))
205 |                 loss_s1_100 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out1_100)), [1, 2, 3])))
206 |                 loss_s1_075 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap075, pose_out1_075)), [1, 2, 3])))
207 |                 loss_s1_050 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap050, pose_out1_050)), [1, 2, 3])))
208 | 
209 |                 loss_s2 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out2)), [1, 2, 3])))
210 |                 loss_s2_100 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out2_100)), [1, 2, 3])))
211 |                 loss_s2_075 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap075, pose_out2_075)), [1, 2, 3])))
212 |                 loss_s2_050 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap050, pose_out2_050)), [1, 2, 3])))
213 | 
214 |                 loss_s3 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out3)), [1, 2, 3])))
215 |                 loss_s3_100 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap, pose_out3_100)), [1, 2, 3])))
216 |                 loss_s3_075 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap075, pose_out3_075)), [1, 2, 3])))
217 |                 loss_s3_050 = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(next_heatmap050, pose_out3_050)), [1, 2, 3])))
218 | 
219 |                 loss_parsing = loss_p1 + loss_p1_100 + loss_p1_075 + loss_p1_050 + loss_p2 + loss_p2_100 + loss_p2_075 + loss_p2_050 + loss_p3 + loss_p3_100 + loss_p3_075 + loss_p3_050
220 |                 loss_pose = loss_s1 + loss_s1_100 + loss_s1_075 + loss_s1_050 + loss_s2 + loss_s2_100 + loss_s2_075 + loss_s2_050 + loss_s3 + loss_s3_100 + loss_s3_075 + loss_s3_050
221 |                 reduced_loss =  loss_pose * s_Weight + loss_parsing * p_Weight
222 | 
223 |                 trainable_variable = tf.trainable_variables()
224 |                 grads = optim.compute_gradients(reduced_loss, var_list=trainable_variable)
225 |                 
226 |                 tower_grads.append(grads)
227 | 
228 |                 tf.add_to_collection('loss_p1', loss_p1)
229 |                 tf.add_to_collection('loss_p2', loss_p2)
230 |                 tf.add_to_collection('loss_p3', loss_p3)
231 |                 tf.add_to_collection('loss_s1', loss_s1)
232 |                 tf.add_to_collection('loss_s2', loss_s2)
233 |                 tf.add_to_collection('loss_s3', loss_s3)
234 |                 tf.add_to_collection('reduced_loss', reduced_loss)
235 | 
236 |     # Average the gradients
237 |     grads_ave = average_gradients(tower_grads)
238 |     # apply the gradients with our optimizers
239 |     train_op = optim.apply_gradients(grads_ave)
240 | 
241 |     loss_p1_ave = tf.reduce_mean(tf.get_collection('loss_p1'))
242 |     loss_p2_ave = tf.reduce_mean(tf.get_collection('loss_p2'))
243 |     loss_p3_ave = tf.reduce_mean(tf.get_collection('loss_p3'))
244 |     loss_s1_ave = tf.reduce_mean(tf.get_collection('loss_s1'))
245 |     loss_s2_ave = tf.reduce_mean(tf.get_collection('loss_s2'))
246 |     loss_s3_ave = tf.reduce_mean(tf.get_collection('loss_s3'))
247 |     loss_ave = tf.reduce_mean(tf.get_collection('reduced_loss'))
248 | 
249 |     loss_summary_p1 = tf.summary.scalar("loss_p1_ave", loss_p1_ave)
250 |     loss_summary_p2 = tf.summary.scalar("loss_p2_ave", loss_p2_ave)
251 |     loss_summary_p3 = tf.summary.scalar("loss_p3_ave", loss_p3_ave)
252 |     loss_summary_s1 = tf.summary.scalar("loss_s1_ave", loss_s1_ave)
253 |     loss_summary_s2 = tf.summary.scalar("loss_s2_ave", loss_s2_ave)
254 |     loss_summary_s3 = tf.summary.scalar("loss_s3_ave", loss_s3_ave)
255 |     loss_summary_ave = tf.summary.scalar("loss_ave", loss_ave)
256 |     loss_summary = tf.summary.merge([loss_summary_ave, loss_summary_s1, loss_summary_s2, loss_summary_s3, loss_summary_p1, loss_summary_p2, loss_summary_p3])
257 |     summary_writer = tf.summary.FileWriter(LOG_DIR, graph=tf.get_default_graph())
258 | 
259 |     # Set up tf session and initialize variables.
260 |     config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=False)
261 |     config.gpu_options.allow_growth = True
262 |     sess = tf.Session(config=config)
263 |     init = tf.global_variables_initializer()
264 |     sess.run(init)
265 | 
266 |     # Saver for storing checkpoints of the model.
267 |     all_saver_var = tf.global_variables()
268 |     restore_var = all_saver_var #[v for v in all_saver_var if 'pose' not in v.name and 'parsing' not in v.name]
269 |     saver = tf.train.Saver(var_list=all_saver_var, max_to_keep=50)
270 |     loader = tf.train.Saver(var_list=restore_var)
271 | 
272 |     if load(loader, sess, SNAPSHOT_DIR):
273 |         print(" [*] Load SUCCESS")
274 |     else:
275 |         print(" [!] Load failed...")    
276 | 
277 |     # Start queue threads.
278 |     threads = tf.train.start_queue_runners(coord=coord, sess=sess)
279 | 
280 |     # Iterate over training steps.
281 |     for step in range(NUM_STEPS):
282 |         start_time = time.time()
283 |         loss_value = 0
284 |         feed_dict = { step_ph : step }
285 | 
286 |         # Apply gradients.
287 |         summary, loss_value, _ = sess.run([loss_summary, reduced_loss, train_op], feed_dict=feed_dict)
288 |         summary_writer.add_summary(summary, step)
289 |         if step % SAVE_PRED_EVERY == 0:
290 |             save(saver, sess, SNAPSHOT_DIR, step)
291 | 
292 |         duration = time.time() - start_time
293 |         print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
294 |     coord.request_stop()
295 |     coord.join(threads)
296 | 
297 | def average_gradients(tower_grads):
298 |   """Calculate the average gradient for each shared variable across all towers.
299 |   Note that this function provides a synchronization point across all towers.
300 |   Args:
301 |     tower_grads: List of lists of (gradient, variable) tuples. The outer list
302 |       is over individual gradients. The inner list is over the gradient
303 |       calculation for each tower.
304 |   Returns:
305 |      List of pairs of (gradient, variable) where the gradient has been averaged
306 |      across all towers.
307 |   """
308 |   average_grads = []
309 |   for grad_and_vars in zip(*tower_grads):
310 |     # Note that each grad_and_vars looks like the following:
311 |     #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
312 |     grads = []
313 |     for g, _ in grad_and_vars:
314 |       # Add 0 dimension to the gradients to represent the tower.
315 |       expanded_g = tf.expand_dims(g, 0)
316 | 
317 |       # Append on a 'tower' dimension which we will average over below.
318 |       grads.append(expanded_g)
319 | 
320 |     # Average over the 'tower' dimension.
321 |     grad = tf.concat(axis=0, values=grads)
322 |     grad = tf.reduce_mean(grad, 0)
323 | 
324 |     # Keep in mind that the Variables are redundant because they are shared
325 |     # across towers. So .. we will just return the first tower's pointer to
326 |     # the Variable.
327 |     v = grad_and_vars[0][1]
328 |     grad_and_var = (grad, v)
329 |     average_grads.append(grad_and_var)
330 |   return average_grads
331 | 
332 | if __name__ == '__main__':
333 |     main()
334 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import JPPNetModel
2 | from .image_reader import ImageReader
3 | from .utils import decode_labels, inv_preprocess, prepare_label, save, load
4 | from .ops import conv2d, max_pool, linear
5 | from .lip_reader import LIPReader


--------------------------------------------------------------------------------
/utils/image_reader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | IGNORE_LABEL = 255
  7 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
  8 | 
  9 | def image_scaling(img, label):
 10 |     """
 11 |     Randomly scales the images between 0.5 to 1.5 times the original size.
 12 | 
 13 |     Args:
 14 |       img: Training image to scale.
 15 |       label: Segmentation mask to scale.
 16 |     """
 17 |     
 18 |     scale = tf.random_uniform([1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None)
 19 |     h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[0]), scale))
 20 |     w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[1]), scale))
 21 |     new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1])
 22 |     img = tf.image.resize_images(img, new_shape)
 23 |     label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape)
 24 |     label = tf.squeeze(label, squeeze_dims=[0])
 25 |    
 26 |     return img, label
 27 | 
 28 | def image_mirroring(img, label):
 29 |     """
 30 |     Randomly mirrors the images.
 31 | 
 32 |     Args:
 33 |       img: Training image to mirror.
 34 |       label: Segmentation mask to mirror.
 35 |     """
 36 |     
 37 |     distort_left_right_random = tf.random_uniform([1], 0, 1.0, dtype=tf.float32)[0]
 38 |     mirror = tf.less(tf.stack([1.0, distort_left_right_random, 1.0]), 0.5)
 39 |     img = tf.reverse(img, mirror)
 40 |     reversed_label = tf.reverse(label, mirror)
 41 | 
 42 |     return img, reversed_label
 43 | 
 44 | def random_crop_and_pad_image_and_labels(image, label, crop_h, crop_w, ignore_label=255):
 45 |     """
 46 |     Randomly crop and pads the input images.
 47 | 
 48 |     Args:
 49 |       image: Training image to crop/ pad.
 50 |       label: Segmentation mask to crop/ pad.
 51 |       crop_h: Height of cropped segment.
 52 |       crop_w: Width of cropped segment.
 53 |       ignore_label: Label to ignore during the training.
 54 |     """
 55 | 
 56 |     label = tf.cast(label, dtype=tf.float32)
 57 |     label = label - ignore_label # Needs to be subtracted and later added due to 0 padding.
 58 |     combined = tf.concat([image, label], 2) 
 59 |     image_shape = tf.shape(image)
 60 |     combined_pad = tf.image.pad_to_bounding_box(combined, 0, 0, tf.maximum(crop_h, image_shape[0]), tf.maximum(crop_w, image_shape[1]))
 61 |     
 62 |     last_image_dim = tf.shape(image)[-1]
 63 |     last_label_dim = tf.shape(label)[-1]
 64 |     combined_crop = tf.random_crop(combined_pad, [crop_h,crop_w,4])
 65 |     img_crop = combined_crop[:, :, :last_image_dim]
 66 |     label_crop = combined_crop[:, :, last_image_dim:]
 67 |     label_crop = label_crop + ignore_label
 68 |     label_crop = tf.cast(label_crop, dtype=tf.uint8)
 69 |     
 70 |     # Set static shape so that tensorflow knows shape at compile time. 
 71 |     img_crop.set_shape((crop_h, crop_w, 3))
 72 |     label_crop.set_shape((crop_h,crop_w, 1))
 73 |     return img_crop, label_crop  
 74 | 
 75 | def read_labeled_image_list(data_dir, data_list):
 76 |     """Reads txt file containing paths to images and ground truth masks.
 77 |     
 78 |     Args:
 79 |       data_dir: path to the directory with images and masks.
 80 |       data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
 81 |        
 82 |     Returns:
 83 |       Two lists with all file names for images and masks, respectively.
 84 |     """
 85 |     f = open(data_list, 'r')
 86 |     images = []
 87 |     for line in f:
 88 |         try:
 89 |             image, mask = line.strip("\n").split(' ')
 90 |         except ValueError: # Adhoc for test.
 91 |             image = line.strip("\n")
 92 |         images.append(data_dir + image)
 93 |     return images
 94 | 
 95 | def read_images_from_disk(input_queue, input_size, random_scale, random_mirror): # optional pre-processing arguments
 96 |     """Read one image and its corresponding mask with optional pre-processing.
 97 |     
 98 |     Args:
 99 |       input_queue: tf queue with paths to the image and its mask.
100 |       input_size: a tuple with (height, width) values.
101 |                   If not given, return images of original size.
102 |       random_scale: whether to randomly scale the images prior
103 |                     to random crop.
104 |       random_mirror: whether to randomly mirror the images prior
105 |                     to random crop.
106 |       
107 |     Returns:
108 |       Two tensors: the decoded image and its mask.
109 |     """
110 | 
111 |     img_contents = tf.read_file(input_queue[0])
112 |     
113 |     img = tf.image.decode_jpeg(img_contents, channels=3)
114 |     img_r, img_g, img_b = tf.split(value=img, num_or_size_splits=3, axis=2)
115 |     img = tf.cast(tf.concat([img_b, img_g, img_r], 2), dtype=tf.float32)
116 |     # Extract mean.
117 |     img -= IMG_MEAN
118 | 
119 |     return img
120 | 
121 | class ImageReader(object):
122 |     '''Generic ImageReader which reads images and corresponding segmentation
123 |        masks from the disk, and enqueues them into a TensorFlow queue.
124 |     '''
125 | 
126 |     def __init__(self, data_dir, data_list, input_size, random_scale,
127 |                  random_mirror, coord):
128 |         '''Initialise an ImageReader.
129 |         
130 |         Args:
131 |           data_dir: path to the directory with images and masks.
132 |           data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
133 |           input_size: a tuple with (height, width) values, to which all the images will be resized.
134 |           random_scale: whether to randomly scale the images prior to random crop.
135 |           random_mirror: whether to randomly mirror the images prior to random crop.
136 |           coord: TensorFlow queue coordinator.
137 |         '''
138 |         self.data_dir = data_dir
139 |         self.data_list = data_list
140 |         self.input_size = input_size
141 |         self.coord = coord
142 | 
143 |         self.image_list = read_labeled_image_list(self.data_dir, self.data_list)
144 |         self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string)
145 |         self.queue = tf.train.slice_input_producer([self.images],
146 |                                                    shuffle=input_size is not None) # not shuffling if it is val
147 |         self.image = read_images_from_disk(self.queue, self.input_size, random_scale, random_mirror) 
148 | 
149 |     def dequeue(self, num_elements):
150 |         '''Pack images and labels into a batch.
151 |         
152 |         Args:
153 |           num_elements: the batch size.
154 |           
155 |         Returns:
156 |           Two tensors of size (batch_size, h, w, {3, 1}) for images and masks.'''
157 |         image_batch, label_batch = tf.train.batch([self.image, self.label],
158 |                                                   num_elements)
159 |         return image_batch, label_batch
160 | 


--------------------------------------------------------------------------------
/utils/lip_reader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import random
  6 | 
  7 | IGNORE_LABEL = 255
  8 | NUM_POSE = 16
  9 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
 10 | 
 11 | def image_scaling(img, label, heatmap):
 12 |     """
 13 |     Randomly scales the images between 0.5 to 1.5 times the original size.
 14 | 
 15 |     Args:
 16 |       img: Training image to scale.
 17 |       label: Segmentation mask to scale.
 18 |     """
 19 |     
 20 |     scale = tf.random_uniform([1], minval=0.75, maxval=1.25, dtype=tf.float32, seed=None)
 21 |     h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[0]), scale))
 22 |     w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[1]), scale))
 23 |     
 24 |     new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1])
 25 |     img = tf.image.resize_images(img, new_shape)
 26 |     label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape)
 27 |     label = tf.squeeze(label, squeeze_dims=[0])
 28 |     heatmap = tf.image.resize_nearest_neighbor(heatmap, new_shape)
 29 |     return img, label, heatmap
 30 | 
 31 | def image_mirroring(img, label, label_rev, heatmap, heatmap_rev):
 32 |     """
 33 |     Randomly mirrors the images.
 34 | 
 35 |     Args:
 36 |       img: Training image to mirror.
 37 |       label: Segmentation mask to mirror.
 38 |     """
 39 | 
 40 |     distort_left_right_random = tf.random_uniform([1], 0, 1.0, dtype=tf.float32)[0]
 41 |     mirror = tf.less(tf.stack([1.0, distort_left_right_random, 1.0]), 0.5)
 42 |     mirror = tf.boolean_mask([0, 1, 2], mirror)
 43 |     img = tf.reverse(img, mirror)
 44 | 
 45 |     flag = tf.less(distort_left_right_random, 0.5)
 46 |     mask = tf.stack([tf.logical_not(flag), flag])
 47 | 
 48 |     label_and_rev = tf.stack([label, label_rev])
 49 |     label_ = tf.boolean_mask(label_and_rev, mask)
 50 |     label_ = tf.reshape(label_, tf.shape(label))
 51 | 
 52 |     heatmap_and_rev = tf.stack([heatmap, heatmap_rev])
 53 |     heatmap_ = tf.boolean_mask(heatmap_and_rev, mask)
 54 |     heatmap_ = tf.reshape(heatmap_, tf.shape(heatmap))
 55 | 
 56 |     return img, label_, heatmap_
 57 | 
 58 | def random_resize_img_labels(image, label, heatmap, resized_h, resized_w):
 59 | 
 60 |     scale = tf.random_uniform([1], minval=0.75, maxval=1.25, dtype=tf.float32, seed=None)
 61 |     h_new = tf.to_int32(tf.multiply(tf.to_float(resized_h), scale))
 62 |     w_new = tf.to_int32(tf.multiply(tf.to_float(resized_w), scale))
 63 | 
 64 |     new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1])
 65 |     img = tf.image.resize_images(image, new_shape)
 66 |     label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape)
 67 |     label = tf.squeeze(label, squeeze_dims=[0])
 68 |     heatmap = tf.image.resize_nearest_neighbor(tf.expand_dims(heatmap, 0), new_shape)
 69 |     heatmap = tf.squeeze(heatmap, squeeze_dims=[0])
 70 |     return img, label, heatmap
 71 | 
 72 | def resize_img_labels(image, label, heatmap, resized_h, resized_w):
 73 | 
 74 |     new_shape = tf.stack([tf.to_int32(resized_h), tf.to_int32(resized_w)])
 75 |     img = tf.image.resize_images(image, new_shape)
 76 |     label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape)
 77 |     label = tf.squeeze(label, squeeze_dims=[0])
 78 |     new_shape = tf.stack([tf.to_int32(resized_h / 8.0), tf.to_int32(resized_w / 8.0)])
 79 |     heatmap = tf.image.resize_nearest_neighbor(tf.expand_dims(heatmap, 0), new_shape)
 80 |     heatmap = tf.squeeze(heatmap, squeeze_dims=[0])
 81 |     return img, label, heatmap
 82 | 
 83 | def random_crop_and_pad_image_and_labels(image, label, heatmap, crop_h, crop_w, ignore_label=255):
 84 |     """
 85 |     Randomly crop and pads the input images.
 86 | 
 87 |     Args:
 88 |       image: Training image to crop/ pad.
 89 |       label: Segmentation mask to crop/ pad.
 90 |       crop_h: Height of cropped segment.
 91 |       crop_w: Width of cropped segment.
 92 |       ignore_label: Label to ignore during the training.
 93 |     """
 94 | 
 95 |     label = tf.cast(label, dtype=tf.float32)
 96 |     label = label - ignore_label # Needs to be subtracted and later added due to 0 padding.
 97 |     heatmap = tf.cast(heatmap, dtype=tf.float32)
 98 |     combined = tf.concat([image, label, heatmap], 2) 
 99 |     image_shape = tf.shape(image)
100 |     combined_pad = tf.image.pad_to_bounding_box(combined, 0, 0, tf.maximum(crop_h, image_shape[0]), tf.maximum(crop_w, image_shape[1]))
101 |     
102 |     last_image_dim = tf.shape(image)[-1]
103 |     last_label_dim = tf.shape(label)[-1]
104 |     combined_crop = tf.random_crop(combined_pad, [crop_h,crop_w,4+NUM_POSE])
105 |     img_crop = combined_crop[:, :, :last_image_dim]
106 |     label_crop = combined_crop[:, :, last_image_dim:last_image_dim+last_label_dim]
107 |     heatmap_crop = combined_crop[:, :, last_image_dim+last_label_dim:]
108 |     label_crop = label_crop + ignore_label
109 |     label_crop = tf.cast(label_crop, dtype=tf.uint8)
110 |     
111 |     # Set static shape so that tensorflow knows shape at compile time. 
112 |     img_crop.set_shape((crop_h, crop_w, 3))
113 |     label_crop.set_shape((crop_h,crop_w, 1))
114 |     heatmap_crop.set_shape((crop_h, crop_w, NUM_POSE))
115 |     new_shape = tf.stack([tf.to_int32(crop_h / 8.0), tf.to_int32(crop_w / 8.0)])
116 |     heatmap = tf.image.resize_nearest_neighbor(tf.expand_dims(heatmap_crop, 0), new_shape)
117 |     heatmap = tf.squeeze(heatmap, squeeze_dims=[0])
118 |     return img_crop, label_crop, heatmap  
119 | 
120 | 
121 | def read_labeled_image_list(data_dir, data_list):
122 |     """Reads txt file containing paths to images and ground truth masks.
123 |     
124 |     Args:
125 |       data_dir: path to the directory with images and masks.
126 |       data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
127 |        
128 |     Returns:
129 |       Two lists with all file names for images and masks, respectively.
130 |     """
131 |     f = open(data_list, 'r')
132 |     images = []
133 |     masks = []
134 |     masks_rev = []
135 |     for line in f:
136 |         try:
137 |             image, mask, mask_rev = line.strip("\n").split(' ')
138 |         except ValueError: # Adhoc for test.
139 |             image = mask = mask_rev = line.strip("\n")
140 |         images.append(data_dir + image)
141 |         masks.append(data_dir + mask)
142 |         masks_rev.append(data_dir + mask_rev)
143 |     return images, masks, masks_rev
144 | 
145 | def read_pose_list(data_dir, data_id_list):
146 |     f = open(data_id_list, 'r')
147 |     poses = []
148 |     for line in f:
149 |         pose = line.strip("\n")
150 |         poses.append(data_dir + '/heatmap/' + pose)
151 |     return poses
152 | 
153 | 
154 | def read_images_from_disk(input_queue, input_size, random_scale, random_mirror=False): # optional pre-processing arguments
155 |     """Read one image and its corresponding mask with optional pre-processing.
156 |     
157 |     Args:
158 |       input_queue: tf queue with paths to the image and its mask.
159 |       input_size: a tuple with (height, width) values.
160 |                   If not given, return images of original size.
161 |       random_scale: whether to randomly scale the images prior
162 |                     to random crop.
163 |       random_mirror: whether to randomly mirror the images prior
164 |                     to random crop.
165 |       
166 |     Returns:
167 |       Two tensors: the decoded image and its mask.
168 |     """
169 | 
170 |     img_contents = tf.read_file(input_queue[0])
171 |     label_contents = tf.read_file(input_queue[1])
172 |     label_contents_rev = tf.read_file(input_queue[2])
173 |     
174 |     img = tf.image.decode_jpeg(img_contents, channels=3)
175 |     img_r, img_g, img_b = tf.split(value=img, num_or_size_splits=3, axis=2)
176 |     img = tf.cast(tf.concat([img_b, img_g, img_r], 2), dtype=tf.float32)
177 |     # Extract mean.
178 |     img -= IMG_MEAN
179 | 
180 |     label = tf.image.decode_png(label_contents, channels=1)
181 |     label_rev = tf.image.decode_png(label_contents_rev, channels=1)
182 | 
183 |     pose_id = input_queue[3]
184 |     pose = []
185 |     for i in xrange(NUM_POSE):
186 |         pose_contents = tf.read_file(pose_id+'_{}.png'.format(i))
187 |         pose_i = tf.image.decode_png(pose_contents, channels=1)
188 |         pose.append(pose_i)
189 |     heatmap = tf.concat(pose, axis=2)
190 | 
191 |     # create reversed heatmap
192 |     pose_rev = [None] * 16
193 |     pose_rev[0] = pose[5]
194 |     pose_rev[1] = pose[4]
195 |     pose_rev[2] = pose[3]
196 |     pose_rev[3] = pose[2]
197 |     pose_rev[4] = pose[1]
198 |     pose_rev[5] = pose[0]
199 |     pose_rev[10] = pose[15]
200 |     pose_rev[11] = pose[14]
201 |     pose_rev[12] = pose[13]
202 |     pose_rev[13] = pose[12]
203 |     pose_rev[14] = pose[11]
204 |     pose_rev[15] = pose[10]
205 |     pose_rev[6] = pose[6]
206 |     pose_rev[7] = pose[7]
207 |     pose_rev[8] = pose[8]
208 |     pose_rev[9] = pose[9]
209 |     heatmap_rev = tf.concat(pose_rev, axis=2)
210 |     heatmap_rev = tf.reverse(heatmap_rev, tf.stack([1]))
211 | 
212 |     if input_size is not None:
213 |         h, w = input_size
214 | 
215 |         # Randomly mirror the images and labels.
216 |         if random_mirror:
217 |             img, label, heatmap = image_mirroring(img, label, label_rev, heatmap, heatmap_rev)
218 | 
219 |         # Randomly resize the images and labels.
220 |         if random_scale:
221 |             img, label, heatmap = random_resize_img_labels(img, label, heatmap, h, w)
222 |             # Random scale must be followed by crop to create fixed size
223 |             img, label, heatmap = random_crop_and_pad_image_and_labels(img, label, heatmap, h, w, IGNORE_LABEL)
224 |         else:
225 |             img, label, heatmap = resize_img_labels(img, label, heatmap, h, w)
226 | 
227 |     return img, label, heatmap
228 | 
229 | class LIPReader(object):
230 |     '''Generic ImageReader which reads images and corresponding segmentation
231 |        masks from the disk, and enqueues them into a TensorFlow queue.
232 |     '''
233 | 
234 |     def __init__(self, data_dir, data_list, data_id_list, input_size, random_scale,
235 |                  random_mirror, shuffle, coord):
236 |         '''Initialise an ImageReader.
237 |         
238 |         Args:
239 |           data_dir: path to the directory with images and masks.
240 |           data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
241 |           data_id_list: path to the file of image id.
242 |           input_size: a tuple with (height, width) values, to which all the images will be resized.
243 |           random_scale: whether to randomly scale the images prior to random crop.
244 |           random_mirror: whether to randomly mirror the images prior to random crop.
245 |           coord: TensorFlow queue coordinator.
246 |         '''
247 |         self.data_dir = data_dir
248 |         self.data_list = data_list
249 |         self.data_id_list = data_id_list
250 |         self.input_size = input_size
251 |         self.coord = coord
252 | 
253 |         self.image_list, self.label_list, self.label_rev_list = read_labeled_image_list(self.data_dir, self.data_list)
254 |         self.pose_list = read_pose_list(self.data_dir, self.data_id_list)
255 |         self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string)
256 |         self.labels = tf.convert_to_tensor(self.label_list, dtype=tf.string)
257 |         self.labels_rev = tf.convert_to_tensor(self.label_rev_list, dtype=tf.string)
258 |         self.poses = tf.convert_to_tensor(self.pose_list, dtype=tf.string)
259 |         self.queue = tf.train.slice_input_producer([self.images, self.labels, self.labels_rev, self.poses], shuffle=shuffle) 
260 |         self.image, self.label, self.heatmap = read_images_from_disk(self.queue, self.input_size, random_scale, random_mirror) 
261 | 
262 |     def dequeue(self, num_elements):
263 |         '''Pack images and labels into a batch.
264 |         
265 |         Args:
266 |           num_elements: the batch size.
267 |           
268 |         Returns:
269 |           Two tensors of size (batch_size, h, w, {3, 1}) for images and masks.'''
270 |         batch_list = [self.image, self.label, self.heatmap]
271 |         image_batch, label_batch, heatmap_batch = tf.train.batch([self.image, self.label, self.heatmap], num_elements)
272 |         return image_batch, label_batch, heatmap_batch
273 | 


--------------------------------------------------------------------------------
/utils/model.py:
--------------------------------------------------------------------------------
  1 | # Converted to TensorFlow .caffemodel
  2 | # with the DeepLab-ResNet configuration.
  3 | # The batch normalisation layer is provided by
  4 | # the slim library (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim).
  5 | 
  6 | from kaffe.tensorflow import Network
  7 | import tensorflow as tf
  8 | 
  9 | class JPPNetModel(Network):
 10 |     def setup(self, is_training, n_classes):
 11 |         '''Network definition.
 12 |         
 13 |         Args:
 14 |           is_training: whether to update the running mean and variance of the batch normalisation layer.
 15 |                        If the batch size is small, it is better to keep the running mean and variance of 
 16 |                        the-pretrained model frozen.
 17 |         '''
 18 |         (self.feed('data')
 19 |              .conv(7, 7, 64, 2, 2, biased=False, relu=False, name='conv1')
 20 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn_conv1')
 21 |              .max_pool(3, 3, 2, 2, name='pool1')
 22 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1')
 23 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch1'))
 24 | 
 25 |         (self.feed('pool1')
 26 |              .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a')
 27 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2a')
 28 |              .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b')
 29 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2b')
 30 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c')
 31 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch2c'))
 32 | 
 33 |         (self.feed('bn2a_branch1', 
 34 |                    'bn2a_branch2c')
 35 |              .add(name='res2a')
 36 |              .relu(name='res2a_relu')
 37 |              .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a')
 38 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2a')
 39 |              .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b')
 40 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2b')
 41 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c')
 42 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2b_branch2c'))
 43 | 
 44 |         (self.feed('res2a_relu', 
 45 |                    'bn2b_branch2c')
 46 |              .add(name='res2b')
 47 |              .relu(name='res2b_relu')
 48 |              .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a')
 49 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2a')
 50 |              .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b')
 51 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2b')
 52 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c')
 53 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2c_branch2c'))
 54 | 
 55 |         (self.feed('res2b_relu', 
 56 |                    'bn2c_branch2c')
 57 |              .add(name='res2c')
 58 |              .relu(name='res2c_relu')
 59 |              .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1')
 60 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch1'))
 61 | 
 62 |         (self.feed('res2c_relu')
 63 |              .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a')
 64 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2a')
 65 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b')
 66 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2b')
 67 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c')
 68 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch2c'))
 69 | 
 70 |         (self.feed('bn3a_branch1', 
 71 |                    'bn3a_branch2c')
 72 |              .add(name='res3a')
 73 |              .relu(name='res3a_relu')
 74 |              .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2a')
 75 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b1_branch2a')
 76 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2b')
 77 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b1_branch2b')
 78 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b1_branch2c')
 79 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b1_branch2c'))
 80 | 
 81 |         (self.feed('res3a_relu', 
 82 |                    'bn3b1_branch2c')
 83 |              .add(name='res3b1')
 84 |              .relu(name='res3b1_relu')
 85 |              .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2a')
 86 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b2_branch2a')
 87 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2b')
 88 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b2_branch2b')
 89 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b2_branch2c')
 90 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b2_branch2c'))
 91 | 
 92 |         (self.feed('res3b1_relu', 
 93 |                    'bn3b2_branch2c')
 94 |              .add(name='res3b2')
 95 |              .relu(name='res3b2_relu')
 96 |              .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2a')
 97 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b3_branch2a')
 98 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2b')
 99 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b3_branch2b')
100 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b3_branch2c')
101 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b3_branch2c'))
102 | 
103 |         (self.feed('res3b2_relu', 
104 |                    'bn3b3_branch2c')
105 |              .add(name='res3b3')
106 |              .relu(name='res3b3_relu')
107 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch1')
108 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch1'))
109 | 
110 |         (self.feed('res3b3_relu')
111 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4a_branch2a')
112 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2a')
113 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4a_branch2b')
114 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2b')
115 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c')
116 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch2c'))
117 | 
118 |         (self.feed('bn4a_branch1', 
119 |                    'bn4a_branch2c')
120 |              .add(name='res4a')
121 |              .relu(name='res4a_relu')
122 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b1_branch2a')
123 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b1_branch2a')
124 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b1_branch2b')
125 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b1_branch2b')
126 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b1_branch2c')
127 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b1_branch2c'))
128 | 
129 |         (self.feed('res4a_relu', 
130 |                    'bn4b1_branch2c')
131 |              .add(name='res4b1')
132 |              .relu(name='res4b1_relu')
133 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b2_branch2a')
134 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b2_branch2a')
135 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b2_branch2b')
136 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b2_branch2b')
137 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b2_branch2c')
138 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b2_branch2c'))
139 | 
140 |         (self.feed('res4b1_relu', 
141 |                    'bn4b2_branch2c')
142 |              .add(name='res4b2')
143 |              .relu(name='res4b2_relu')
144 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b3_branch2a')
145 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b3_branch2a')
146 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b3_branch2b')
147 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b3_branch2b')
148 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b3_branch2c')
149 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b3_branch2c'))
150 | 
151 |         (self.feed('res4b2_relu', 
152 |                    'bn4b3_branch2c')
153 |              .add(name='res4b3')
154 |              .relu(name='res4b3_relu')
155 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b4_branch2a')
156 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b4_branch2a')
157 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b4_branch2b')
158 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b4_branch2b')
159 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b4_branch2c')
160 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b4_branch2c'))
161 | 
162 |         (self.feed('res4b3_relu', 
163 |                    'bn4b4_branch2c')
164 |              .add(name='res4b4')
165 |              .relu(name='res4b4_relu')
166 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b5_branch2a')
167 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b5_branch2a')
168 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b5_branch2b')
169 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b5_branch2b')
170 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b5_branch2c')
171 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b5_branch2c'))
172 | 
173 |         (self.feed('res4b4_relu', 
174 |                    'bn4b5_branch2c')
175 |              .add(name='res4b5')
176 |              .relu(name='res4b5_relu')
177 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b6_branch2a')
178 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b6_branch2a')
179 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b6_branch2b')
180 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b6_branch2b')
181 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b6_branch2c')
182 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b6_branch2c'))
183 | 
184 |         (self.feed('res4b5_relu', 
185 |                    'bn4b6_branch2c')
186 |              .add(name='res4b6')
187 |              .relu(name='res4b6_relu')
188 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b7_branch2a')
189 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b7_branch2a')
190 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b7_branch2b')
191 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b7_branch2b')
192 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b7_branch2c')
193 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b7_branch2c'))
194 | 
195 |         (self.feed('res4b6_relu', 
196 |                    'bn4b7_branch2c')
197 |              .add(name='res4b7')
198 |              .relu(name='res4b7_relu')
199 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b8_branch2a')
200 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b8_branch2a')
201 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b8_branch2b')
202 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b8_branch2b')
203 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b8_branch2c')
204 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b8_branch2c'))
205 | 
206 |         (self.feed('res4b7_relu', 
207 |                    'bn4b8_branch2c')
208 |              .add(name='res4b8')
209 |              .relu(name='res4b8_relu')
210 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b9_branch2a')
211 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b9_branch2a')
212 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b9_branch2b')
213 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b9_branch2b')
214 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b9_branch2c')
215 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b9_branch2c'))
216 | 
217 |         (self.feed('res4b8_relu', 
218 |                    'bn4b9_branch2c')
219 |              .add(name='res4b9')
220 |              .relu(name='res4b9_relu')
221 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b10_branch2a')
222 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b10_branch2a')
223 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b10_branch2b')
224 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b10_branch2b')
225 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b10_branch2c')
226 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b10_branch2c'))
227 | 
228 |         (self.feed('res4b9_relu', 
229 |                    'bn4b10_branch2c')
230 |              .add(name='res4b10')
231 |              .relu(name='res4b10_relu')
232 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b11_branch2a')
233 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b11_branch2a')
234 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b11_branch2b')
235 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b11_branch2b')
236 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b11_branch2c')
237 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b11_branch2c'))
238 | 
239 |         (self.feed('res4b10_relu', 
240 |                    'bn4b11_branch2c')
241 |              .add(name='res4b11')
242 |              .relu(name='res4b11_relu')
243 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b12_branch2a')
244 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b12_branch2a')
245 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b12_branch2b')
246 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b12_branch2b')
247 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b12_branch2c')
248 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b12_branch2c'))
249 | 
250 |         (self.feed('res4b11_relu', 
251 |                    'bn4b12_branch2c')
252 |              .add(name='res4b12')
253 |              .relu(name='res4b12_relu')
254 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b13_branch2a')
255 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b13_branch2a')
256 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b13_branch2b')
257 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b13_branch2b')
258 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b13_branch2c')
259 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b13_branch2c'))
260 | 
261 |         (self.feed('res4b12_relu', 
262 |                    'bn4b13_branch2c')
263 |              .add(name='res4b13')
264 |              .relu(name='res4b13_relu')
265 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b14_branch2a')
266 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b14_branch2a')
267 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b14_branch2b')
268 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b14_branch2b')
269 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b14_branch2c')
270 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b14_branch2c'))
271 | 
272 |         (self.feed('res4b13_relu', 
273 |                    'bn4b14_branch2c')
274 |              .add(name='res4b14')
275 |              .relu(name='res4b14_relu')
276 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b15_branch2a')
277 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b15_branch2a')
278 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b15_branch2b')
279 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b15_branch2b')
280 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b15_branch2c')
281 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b15_branch2c'))
282 | 
283 |         (self.feed('res4b14_relu', 
284 |                    'bn4b15_branch2c')
285 |              .add(name='res4b15')
286 |              .relu(name='res4b15_relu')
287 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b16_branch2a')
288 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b16_branch2a')
289 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b16_branch2b')
290 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b16_branch2b')
291 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b16_branch2c')
292 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b16_branch2c'))
293 | 
294 |         (self.feed('res4b15_relu', 
295 |                    'bn4b16_branch2c')
296 |              .add(name='res4b16')
297 |              .relu(name='res4b16_relu')
298 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b17_branch2a')
299 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b17_branch2a')
300 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b17_branch2b')
301 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b17_branch2b')
302 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b17_branch2c')
303 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b17_branch2c'))
304 | 
305 |         (self.feed('res4b16_relu', 
306 |                    'bn4b17_branch2c')
307 |              .add(name='res4b17')
308 |              .relu(name='res4b17_relu')
309 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b18_branch2a')
310 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b18_branch2a')
311 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b18_branch2b')
312 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b18_branch2b')
313 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b18_branch2c')
314 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b18_branch2c'))
315 | 
316 |         (self.feed('res4b17_relu', 
317 |                    'bn4b18_branch2c')
318 |              .add(name='res4b18')
319 |              .relu(name='res4b18_relu')
320 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b19_branch2a')
321 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b19_branch2a')
322 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b19_branch2b')
323 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b19_branch2b')
324 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b19_branch2c')
325 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b19_branch2c'))
326 | 
327 |         (self.feed('res4b18_relu', 
328 |                    'bn4b19_branch2c')
329 |              .add(name='res4b19')
330 |              .relu(name='res4b19_relu')
331 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b20_branch2a')
332 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b20_branch2a')
333 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b20_branch2b')
334 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b20_branch2b')
335 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b20_branch2c')
336 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b20_branch2c'))
337 | 
338 |         (self.feed('res4b19_relu', 
339 |                    'bn4b20_branch2c')
340 |              .add(name='res4b20')
341 |              .relu(name='res4b20_relu')
342 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b21_branch2a')
343 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b21_branch2a')
344 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b21_branch2b')
345 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b21_branch2b')
346 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b21_branch2c')
347 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b21_branch2c'))
348 | 
349 |         (self.feed('res4b20_relu', 
350 |                    'bn4b21_branch2c')
351 |              .add(name='res4b21')
352 |              .relu(name='res4b21_relu')
353 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b22_branch2a')
354 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b22_branch2a')
355 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b22_branch2b')
356 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b22_branch2b')
357 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b22_branch2c')
358 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b22_branch2c'))
359 | 
360 |         (self.feed('res4b21_relu', 
361 |                    'bn4b22_branch2c')
362 |              .add(name='res4b22')
363 |              .relu(name='res4b22_relu'))
364 | 
365 | ######################################parsing networks################################################################
366 |         (self.feed('res4b22_relu')
367 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch1')
368 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch1'))
369 | 
370 |         (self.feed('res4b22_relu')
371 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5a_branch2a')
372 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2a')
373 |              .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5a_branch2b')
374 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2b')
375 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c')
376 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch2c'))
377 | 
378 |         (self.feed('bn5a_branch1', 
379 |                    'bn5a_branch2c')
380 |              .add(name='res5a')
381 |              .relu(name='res5a_relu')
382 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a')
383 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2a')
384 |              .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5b_branch2b')
385 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2b')
386 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c')
387 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5b_branch2c'))
388 | 
389 |         (self.feed('res5a_relu', 
390 |                    'bn5b_branch2c')
391 |              .add(name='res5b')
392 |              .relu(name='res5b_relu')
393 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a')
394 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5c_branch2a')
395 |              .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5c_branch2b')
396 |              .batch_normalization(activation_fn=tf.nn.relu, name='bn5c_branch2b', is_training=is_training)
397 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c')
398 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5c_branch2c'))
399 | 
400 |         (self.feed('res5b_relu', 
401 |                    'bn5c_branch2c')
402 |              .add(name='res5c')
403 |              .relu(name='res5c_relu')
404 |              .atrous_conv(3, 3, n_classes, 6, padding='SAME', relu=False, name='fc1_human_c0'))
405 | 
406 |         (self.feed('res5c_relu')
407 |              .atrous_conv(3, 3, n_classes, 12, padding='SAME', relu=False, name='fc1_human_c1'))
408 | 
409 |         (self.feed('res5c_relu')
410 |              .atrous_conv(3, 3, n_classes, 18, padding='SAME', relu=False, name='fc1_human_c2'))
411 | 
412 |         (self.feed('res5c_relu')
413 |              .atrous_conv(3, 3, n_classes, 24, padding='SAME', relu=False, name='fc1_human_c3'))
414 | 
415 |         (self.feed('fc1_human_c0', 
416 |                    'fc1_human_c1', 
417 |                    'fc1_human_c2', 
418 |                    'fc1_human_c3')
419 |              .add(name='fc1_human'))
420 | 
421 |         (self.feed('res5c_relu')
422 |              .conv(3, 3, 512, 1, 1, biased=True, relu=True, name='res5d_branch2a_parsing')
423 |              .conv(3, 3, 256, 1, 1, biased=True, relu=True, name='res5d_branch2b_parsing'))
424 | 
425 | # ###################################End################################################################
426 | 
427 | 


--------------------------------------------------------------------------------
/utils/ops.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np 
 3 | import tensorflow as tf
 4 | 
 5 | from tensorflow.python.framework import ops
 6 | 
 7 | def conv2d(input_, output, kernel, stride, relu, bn, name, stddev=0.01):
 8 |     with tf.variable_scope(name) as scope:
 9 |     # Convolution for a given input and kernel
10 |         shape = [kernel, kernel, input_.get_shape()[-1], output]
11 |         w = tf.get_variable('w', shape, initializer=tf.truncated_normal_initializer(stddev=stddev))
12 |         conv = tf.nn.conv2d(input_, w, strides=[1, stride, stride, 1], padding='SAME')
13 |         # Add the biases
14 |         b = tf.get_variable('b', [output], initializer=tf.constant_initializer(0.0))
15 |         conv = tf.nn.bias_add(conv, b)
16 |         if bn:
17 |             conv = tf.layers.batch_normalization(conv)
18 |         # ReLU non-linearity
19 |         if relu:
20 |             conv = tf.nn.relu(conv, name=scope.name)
21 |         return conv
22 | 
23 | def max_pool(input_, kernel, stride, name):
24 |     return tf.nn.max_pool(input_, ksize=[1, kernel, kernel, 1], strides=[1, stride, stride, 1], padding='SAME', name=name)
25 | 
26 | def linear(input_, output, name, stddev=0.02, bias_start=0.0):
27 |     shape = input_.get_shape().as_list()
28 |     with tf.variable_scope(name) as scope:
29 |         matrix = tf.get_variable("Matrix", [shape[1], output], tf.float32,
30 |                                  tf.random_normal_initializer(stddev=stddev))
31 |         bias = tf.get_variable("bias", [output], initializer=tf.constant_initializer(bias_start))
32 |         return tf.matmul(input_, matrix) + bias
33 | 
34 | def atrous_conv2d(input_, output, kernel, rate, relu, name, stddev=0.01):
35 |     with tf.variable_scope(name) as scope:
36 |     # Dilation convolution for a given input and kernel
37 |         shape = [kernel, kernel, input_.get_shape()[-1], output]
38 |         w = tf.get_variable('w', shape, initializer=tf.truncated_normal_initializer(stddev=stddev))
39 |         conv = tf.nn.atrous_conv2d(input_, w, rate, padding='SAME')
40 |         # Add the biases
41 |         b = tf.get_variable('b', [output], initializer=tf.constant_initializer(0.0))
42 |         conv = tf.nn.bias_add(conv, b)
43 |         # ReLU non-linearity
44 |         if relu:
45 |             conv = tf.nn.relu(conv, name=scope.name)
46 |         return conv
47 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | import os
  5 | import scipy.misc
  6 | from scipy.stats import multivariate_normal
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | n_classes = 20
 10 | # colour map
 11 | label_colours = [(0,0,0)
 12 |                 # 0=Background
 13 |                 ,(128,0,0),(255,0,0),(0,85,0),(170,0,51),(255,85,0)
 14 |                 # 1=Hat,  2=Hair,    3=Glove, 4=Sunglasses, 5=UpperClothes
 15 |                 ,(0,0,85),(0,119,221),(85,85,0),(0,85,85),(85,51,0)
 16 |                 # 6=Dress, 7=Coat, 8=Socks, 9=Pants, 10=Jumpsuits
 17 |                 ,(52,86,128),(0,128,0),(0,0,255),(51,170,221),(0,255,255)
 18 |                 # 11=Scarf, 12=Skirt, 13=Face, 14=LeftArm, 15=RightArm
 19 |                 ,(85,255,170),(170,255,85),(255,255,0),(255,170,0)]
 20 |                 # 16=LeftLeg, 17=RightLeg, 18=LeftShoe, 19=RightShoe
 21 | # image mean
 22 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
 23 |     
 24 | def decode_labels(mask, num_images=1, num_classes=20):
 25 |     """Decode batch of segmentation masks.
 26 |     
 27 |     Args:
 28 |       mask: result of inference after taking argmax.
 29 |       num_images: number of images to decode from the batch.
 30 |     
 31 |     Returns:
 32 |       A batch with num_images RGB images of the same size as the input. 
 33 |     """
 34 |     n, h, w, c = mask.shape
 35 |     assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images)
 36 |     outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8)
 37 |     for i in range(num_images):
 38 |       img = Image.new('RGB', (len(mask[i, 0]), len(mask[i])))
 39 |       pixels = img.load()
 40 |       for j_, j in enumerate(mask[i, :, :, 0]):
 41 |           for k_, k in enumerate(j):
 42 |               if k < n_classes:
 43 |                   pixels[k_,j_] = label_colours[k]
 44 |       outputs[i] = np.array(img)
 45 |     return outputs
 46 | 
 47 | def prepare_label(input_batch, new_size, one_hot=True):
 48 |     """Resize masks and perform one-hot encoding.
 49 | 
 50 |     Args:
 51 |       input_batch: input tensor of shape [batch_size H W 1].
 52 |       new_size: a tensor with new height and width.
 53 | 
 54 |     Returns:
 55 |       Outputs a tensor of shape [batch_size h w 21]
 56 |       with last dimension comprised of 0's and 1's only.
 57 |     """
 58 |     with tf.name_scope('label_encode'):
 59 |         input_batch = tf.image.resize_nearest_neighbor(input_batch, new_size) # as labels are integer numbers, need to use NN interp.
 60 |         input_batch = tf.squeeze(input_batch, squeeze_dims=[3]) # reducing the channel dimension.
 61 |         if one_hot:
 62 |           input_batch = tf.one_hot(input_batch, depth=n_classes)
 63 |     return input_batch
 64 | 
 65 | def inv_preprocess(imgs, num_images):
 66 |   """Inverse preprocessing of the batch of images.
 67 |      Add the mean vector and convert from BGR to RGB.
 68 |    
 69 |   Args:
 70 |     imgs: batch of input images.
 71 |     num_images: number of images to apply the inverse transformations on.
 72 |   
 73 |   Returns:
 74 |     The batch of the size num_images with the same spatial dimensions as the input.
 75 |   """
 76 |   n, h, w, c = imgs.shape
 77 |   assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images)
 78 |   outputs = np.zeros((num_images, h, w, c), dtype=np.uint8)
 79 |   for i in range(num_images):
 80 |     outputs[i] = (imgs[i] + IMG_MEAN)[:, :, ::-1].astype(np.uint8)
 81 |   return outputs
 82 | 
 83 | 
 84 | def save(saver, sess, logdir, step):
 85 |     '''Save weights.   
 86 |     Args:
 87 |      saver: TensorFlow Saver object.
 88 |      sess: TensorFlow session.
 89 |      logdir: path to the snapshots directory.
 90 |      step: current training step.
 91 |     '''
 92 |     if not os.path.exists(logdir):
 93 |         os.makedirs(logdir)   
 94 |     model_name = 'model.ckpt'
 95 |     checkpoint_path = os.path.join(logdir, model_name)
 96 |       
 97 |     if not os.path.exists(logdir):
 98 |       os.makedirs(logdir)
 99 |     saver.save(sess, checkpoint_path, global_step=step)
100 |     print('The checkpoint has been created.')
101 | 
102 | def load(saver, sess, ckpt_path):
103 |     '''Load trained weights.
104 |     
105 |     Args:
106 |       saver: TensorFlow saver object.
107 |       sess: TensorFlow session.
108 |       ckpt_path: path to checkpoint file with parameters.
109 |     ''' 
110 |     ckpt = tf.train.get_checkpoint_state(ckpt_path)
111 |     if ckpt and ckpt.model_checkpoint_path:
112 |         ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
113 |         saver.restore(sess, os.path.join(ckpt_path, ckpt_name))
114 |         print("Restored model parameters from {}".format(ckpt_name))
115 |         return True
116 |     else:
117 |         return False  
118 | 


--------------------------------------------------------------------------------