├── README.md ├── __init__.py ├── demo.py ├── do_train.sh ├── doc └── train.md ├── prepare_datasets.sh ├── requirements.txt └── src_ortho ├── RunModel.py ├── RunModel.pyc ├── __init__.py ├── __init__.pyc ├── benchmark ├── __init__.py ├── __init__.pyc ├── eval_util.py ├── eval_util.pyc ├── evaluate_h36m.py ├── evaluate_h36m_multi.py ├── evaluate_mpi3dhp_test.py ├── evaluate_mpi3dhp_val.py └── evaluate_synthetic.py ├── config.py ├── config.pyc ├── data_loader.py ├── data_loader.pyc ├── datasets ├── __init__.py ├── __init__.pyc ├── coco_to_tfrecords.py ├── common.py ├── common.pyc ├── convert_datasets.sh ├── lsp_to_tfrecords.py ├── mpi_inf_3dhp │ ├── __init__.py │ ├── __init__.pyc │ ├── read_mpi_inf_3dhp.py │ └── read_mpi_inf_3dhp.pyc ├── mpi_inf_3dhp_test_to_tfrecords.py ├── mpi_inf_3dhp_to_tfrecords.py ├── mpii_to_tfrecords.py ├── pycocotools │ ├── __init__.py │ ├── __init__.pyc │ ├── _mask.c │ ├── _mask.pyx │ ├── _mask.so │ ├── coco.py │ ├── coco.pyc │ ├── cocoeval.py │ ├── mask.py │ └── mask.pyc ├── smpl_to_tfrecords.py └── synthetic_to_tfrecords.py ├── main.py ├── models.py ├── models.pyc ├── ops.py ├── ops.pyc ├── tf_smpl ├── __init__.py ├── __init__.pyc ├── batch_lbs.py ├── batch_lbs.pyc ├── batch_smpl.py ├── batch_smpl.pyc ├── projection.py ├── projection.pyc └── smpl_faces.npy ├── trainer.py ├── trainer.pyc └── util ├── __init__.py ├── __init__.pyc ├── data_utils.py ├── data_utils.pyc ├── image.py ├── image.pyc ├── openpose.py ├── openpose.pyc ├── renderer.py └── renderer.pyc /README.md: -------------------------------------------------------------------------------- 1 | # Shape-Aware Human Pose and Shape Reconstruction Using Multi-View Images 2 | 3 | Junbang Liang, Ming C. Lin 4 | ICCV 2019 5 | 6 | [Project Page](https://gamma.umd.edu/researchdirections/virtualtryon/humanmultiview) 7 | 8 | ### Requirements 9 | - Python 2.7 10 | - [TensorFlow](https://www.tensorflow.org/) tested on version 1.3 11 | 12 | ### Demo 13 | 14 | 1. Download the [pre-trained models](https://drive.google.com/file/d/1grEX6HmqL6CKittCyl_N6nggqIRIEOCt/view?usp=sharing) 15 | 16 | 2. Run the demo 17 | ``` 18 | python -m demo --img_paths ${your_image_paths_separated_by_commas} 19 | ``` 20 | 21 | Images should be cropped so that the height of the person is roughly 2/3 of the image height. Please check demo.py for more details. 22 | 23 | ### Training and Data 24 | 25 | Please see doc/train.md. 26 | 27 | ### Citation 28 | If you use this code for your research, please consider citing: 29 | ``` 30 | @inProceedings{liang2019shape, 31 | title={Shape-Aware Human Pose and Shape Reconstruction Using Multi-View Images}, 32 | author = {Junbang Liang 33 | and Ming C. Lin}, 34 | booktitle={International Conference on Computer Vision (ICCV)}, 35 | year={2019} 36 | } 37 | ``` 38 | 39 | ### Acknowledgement 40 | This project is derived from [HMR](https://github.com/akanazawa/hmr). If you have any question, feel free to refer to the original help doc or email liangjb@cs.umd.edu. This work is supported by National Science Foundation and Elizabeth S. Iribe Professorship. 41 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/__init__.py -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demo of HumanMultiView. 3 | 4 | Note that HumanMultiView requires the bounding box of the person in the image. The best performance is obtained when max length of the person in the image is roughly 150px. 5 | 6 | When only the image path is supplied, it assumes that the image is centered on a person whose length is roughly 150px. 7 | Alternatively, you can supply output of the openpose to figure out the bbox and the right scale factor. 8 | 9 | Sample usage: 10 | 11 | # On images on a tightly cropped image around the person 12 | python -m demo --img_paths data/im1963.jpg 13 | python -m demo --img_paths data/coco1.png 14 | 15 | # On images, with openpose output 16 | python -m demo --img_paths data/random.jpg --json_path data/random_keypoints.json 17 | """ 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import sys 23 | from absl import flags 24 | import numpy as np 25 | 26 | import skimage.io as io 27 | import tensorflow as tf 28 | 29 | #src_ortho 30 | from src_ortho.util import renderer as vis_util 31 | from src_ortho.util import image as img_util 32 | from src_ortho.util import openpose as op_util 33 | import src_ortho.config 34 | from src_ortho.RunModel import RunModel 35 | import os 36 | from src_ortho.tf_smpl.batch_smpl import SMPL 37 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 38 | 39 | flags.DEFINE_string('img_paths', 'data/im1963.jpg', 'Images to run, can be multi-view, separated by comma') 40 | flags.DEFINE_string( 41 | 'json_path', None, 42 | 'If specified, uses the openpose output to crop the image.') 43 | flags.DEFINE_integer('scale_size',224,'Scale size. Image will be scaled to this size and cropped at the center with 224x224 size.') 44 | 45 | 46 | def measure(theta, oriverts): 47 | """ 48 | theta: 85: 3+72+10 49 | verts:6890*3 50 | """ 51 | SMPL_model = SMPL(config.smpl_model_path) 52 | tens_shape = tf.placeholder(tf.float32, shape=[1, 10]) 53 | tens_pose = tf.zeros([1, 72]) 54 | verts, res_joint, _ = SMPL_model(tens_shape, tens_pose, get_skin=True) 55 | sess = tf.Session(config=tf.ConfigProto( 56 | device_count = {'GPU': 0} 57 | )) 58 | sess.run(tf.global_variables_initializer()) 59 | sess.run(tf.local_variables_initializer()) 60 | vts, gt3ds = sess.run([verts, res_joint], feed_dict={tens_shape: np.expand_dims(theta[-10:],0)}) 61 | vts=vts[0] 62 | gt3ds=gt3ds[0] 63 | off = -3 64 | neck=[3799,337,155,300,216,426,3772,3721,3665] 65 | #neck=[3060,454,217,219,153,154,301,209,210,215,213,259, 66 | # 427,3167,3922,3771,3728,3727,3722,3724,3812,3666,3668,3730,3729,3945] 67 | arm=[5893,5111,4727] 68 | leg=[4313,4496,6825] 69 | chest=[6501,1257,690,3044,615,1425,740,2909, 70 | 894,753,4243,4739,4252,4134,4686,4102,6491,4828] 71 | waist=[3507,1348,665,633,805,893,2917,6375,4376,4291,4147,4154,4813] 72 | hip=[3513,1209,1514,1457,3088,3140,3120,6544,6562,6512,4923,4986,4403] 73 | height=[3765,6859] 74 | node = lambda x, arr: arr[x+off] 75 | leng = lambda a, i, arr : np.sum((node(a[i], arr)-node(a[(i+1)%len(a)], arr))**2)**0.5 76 | total = lambda a, arr: np.sum([leng(a,i,arr) for i in range(len(a))]) 77 | print('height:{}'.format(leng(height,0,vts))) 78 | print('neck:{}'.format(total(neck,vts))) 79 | print('arm:{}'.format(leng(arm,0,vts)+leng(arm,1,vts))) 80 | print('leg:{}'.format(leng(leg,0,vts)+leng(leg,1,vts))) 81 | print('chest:{}'.format(total(chest,vts))) 82 | print('waist:{}'.format(total(waist,vts))) 83 | print('hip:{}'.format(total(hip,vts))) 84 | f = open('/nfshomes/liangjb/Downloads/show.obj', 'w') 85 | for i in range(vts.shape[0]): 86 | f.write('v ') 87 | for j in range(vts.shape[1]): 88 | f.write('{} '.format(vts[i,j])) 89 | f.write('\n') 90 | 91 | 92 | def visualize(input_imgs, imgs, proc_params, jointss, vertss, cams, view): 93 | """ 94 | Renders the result in original image coordinate frame. 95 | """ 96 | img = imgs[view] 97 | proc_param = proc_params[view] 98 | joints = jointss[view] 99 | verts = vertss[view][0] 100 | cam = cams[view][0] 101 | 102 | cam_for_render, vert_shifted, joints_orig = vis_util.get_original( 103 | proc_param, verts, cam, joints, img_size=img.shape[:2]) 104 | 105 | # Render results 106 | skel_img = vis_util.draw_skeleton(img, joints_orig) 107 | rend_img_overlay = renderer( 108 | vert_shifted, cam=cam_for_render, img=img) 109 | rend_img_overlay = vis_util.draw_skeleton(rend_img_overlay, joints_orig) 110 | rend_img = renderer( 111 | vert_shifted, cam=cam_for_render, img_size=img.shape[:2]) 112 | rend_img_vp1 = renderer.rotated( 113 | vert_shifted, 90, cam=cam_for_render, img_size=img.shape[:2]) 114 | rend_img_vp2 = renderer.rotated( 115 | vert_shifted, -90, cam=cam_for_render, img_size=img.shape[:2]) 116 | 117 | import matplotlib.pyplot as plt 118 | # plt.ion() 119 | plt.figure(1, figsize=(10, 10)) 120 | plt.clf() 121 | plt.subplot(231) 122 | plt.imshow(img) 123 | plt.title('input') 124 | plt.axis('off') 125 | plt.subplot(232) 126 | plt.imshow(input_imgs[0][view]/2+0.5)#skel_img)# 127 | plt.title('joint projection') 128 | plt.axis('off') 129 | plt.subplot(233) 130 | plt.imshow(rend_img_overlay) 131 | plt.title('3D Mesh overlay') 132 | plt.axis('off') 133 | plt.subplot(234) 134 | plt.imshow(rend_img) 135 | plt.title('3D mesh') 136 | plt.axis('off') 137 | plt.subplot(235) 138 | plt.imshow(rend_img_vp1) 139 | plt.title('diff vp') 140 | plt.axis('off') 141 | plt.subplot(236) 142 | plt.imshow(rend_img_vp2) 143 | plt.title('diff vp') 144 | plt.axis('off') 145 | plt.draw() 146 | plt.show() 147 | plt.savefig('res.png') 148 | # io.imsave('ori{}.png'.format(view),img) 149 | io.imsave('ours{}.png'.format(view), rend_img_overlay)#rend_img[:,:,:3])# 150 | # import ipdb 151 | # ipdb.set_trace() 152 | # for i in range(126): 153 | # k = 360.0/125.0*(i-4) 154 | # rend_img_demo = renderer.rotated(vert_shifted, k, cam=cam_for_render, img_size=img.shape[:2]) 155 | # io.imsave('ours%03d.jpg'%(i), rend_img_demo[:,:,:3]) 156 | fcs=[] 157 | with open("/nfshomes/liangjb/Downloads/faces.obj", 'r') as f: 158 | for i,lines in enumerate(f): 159 | fcs.append(lines) 160 | with open("/nfshomes/liangjb/Downloads/show.obj", 'w') as f: 161 | f.write("# OBJ file\n") 162 | for v in range(verts.shape[0]): 163 | f.write("v %.4f %.4f %.4f\n" % (verts[v,0],verts[v,1],verts[v,2])) 164 | for lines in fcs: 165 | f.write("{}".format(lines)) 166 | 167 | 168 | def preprocess_image(img_path, json_path=None, view=0): 169 | img = io.imread(img_path) 170 | if img.shape[2] == 4: 171 | img = img[:, :, :3] 172 | 173 | size=config.scale_size 174 | center = np.round(np.array(img.shape[:2]) / 2).astype(int) 175 | # image center in (x,y) 176 | center = center[::-1] 177 | ori, _ = img_util.scale_and_crop(img, 1, center, 178 | config.img_size/size*np.max(img.shape[:2])*1) 179 | img = ori 180 | if json_path is None: 181 | if np.max(img.shape[:2]) != config.img_size: 182 | print('Resizing so the max image size is %d..' % config.img_size) 183 | # scale = (size / np.max(img.shape[:2])) 184 | scale = (config.img_size / np.max(img.shape[:2])) 185 | else: 186 | scale = 1. 187 | center = np.round(np.array(img.shape[:2]) / 2).astype(int) 188 | # image center in (x,y) 189 | center = center[::-1] 190 | else: 191 | scale, center = op_util.get_bbox(json_path) 192 | 193 | crop, proc_param = img_util.scale_and_crop(img, scale, center, 194 | config.img_size) 195 | # ori = np.maximum(ori.astype(int) - 25,0) 196 | # st=int(80./224*ori.shape[0]) 197 | # en=int(144./224*ori.shape[1]) 198 | # ori[st:en,st:en]=0 199 | io.imsave('ori{}.png'.format(view),ori) 200 | 201 | # Normalize image to [-1, 1] 202 | crop = 2 * ((crop / 255.) - 0.5) 203 | 204 | # crop = crop - 0.2 205 | # if view==0: 206 | # crop[80:144,80:144]=-1 207 | 208 | return crop, proc_param, img 209 | 210 | def clip(x): 211 | mini = 0.01 212 | maxi = 1 213 | return np.clip(x*(x>0),mini,maxi)*(x>0) + np.clip(x*(x<0),-maxi,-mini)*(x<0) 214 | 215 | 216 | def main(img_paths, json_path=None): 217 | sess = tf.Session() 218 | paths = img_paths.split(',') 219 | num_views = len(paths) 220 | model = RunModel(config, 4, num_views, sess=sess) 221 | input_imgs, proc_params, imgs = [],[],[] 222 | 223 | for i,path in enumerate(paths): 224 | input_img, proc_param, img = preprocess_image(path, json_path, i) 225 | input_imgs.append(input_img) 226 | proc_params.append(proc_param) 227 | imgs.append(img) 228 | # Add batch dimension: 1 x D x D x 3 229 | # return 230 | input_imgs = np.expand_dims(np.array(input_imgs), 0) 231 | 232 | joints, verts, cams, joints3d, theta = model.predict( 233 | input_imgs, get_theta=True) 234 | measure(theta[0][0], verts[0][0]) # view, batch 235 | np.set_printoptions(precision=5,suppress=True) 236 | # print(theta[0][0][3:75].reshape((24,3))) 237 | # print(theta[0][0][-10:]) 238 | # print(joints3d[0]) 239 | # verts = clip((verts - joints3d[0][0,5,:]) / 100) + joints3d[0][0,5,:] 240 | 241 | for i in range(num_views): 242 | visualize(input_imgs, imgs, proc_params, joints, verts, cams, i) 243 | 244 | 245 | if __name__ == '__main__': 246 | config = flags.FLAGS 247 | config(sys.argv) 248 | # Using pre-trained model, change this to use your own. 249 | # config.load_path = src.config.PRETRAINED_MODEL 250 | 251 | config.batch_size = 1 252 | 253 | renderer = vis_util.SMPLRenderer(face_path=config.smpl_face_path) 254 | 255 | main(config.img_paths, config.json_path) 256 | -------------------------------------------------------------------------------- /do_train.sh: -------------------------------------------------------------------------------- 1 | CMD="python -m src_ortho.main --encoder_only=True --e_lr 1e-5 --log_img_step 100000 --e_loss_weight 60. --batch_size=32 --use_3d_label True --e_3d_weight 60. --e_pose_weight 60. --e_shape_weight 60. --epoch 20" 2 | 3 | echo $CMD 4 | $CMD 5 | -------------------------------------------------------------------------------- /doc/train.md: -------------------------------------------------------------------------------- 1 | ## Pre-reqs 2 | 3 | ### Download required models 4 | 5 | Download the SMPL model and pre-trained [params](https://drive.google.com/file/d/1grEX6HmqL6CKittCyl_N6nggqIRIEOCt/view?usp=sharing). 6 | 7 | Store this as `HumanMultiView/models/`. 8 | 9 | ### Download datasets. 10 | Download these datasets somewhere. 11 | 12 | - [LSP](http://sam.johnson.io/research/lsp_dataset.zip) and [LSP extended](http://sam.johnson.io/research/lspet_dataset.zip) 13 | - [COCO](http://cocodataset.org/#download) we used 2014 Train. You also need to 14 | install the [COCO API](https://github.com/cocodataset/cocoapi) for python. 15 | - [MPII](http://human-pose.mpi-inf.mpg.de/#download) 16 | - [MPI-INF-3DHP](http://human-pose.mpi-inf.mpg.de/#download) 17 | - [Our synthetic dataset](https://drive.google.com/file/d/1nQEPCVY7VOXV-KOxeCX7hIQ9I4LLumWm/view?usp=sharing) 18 | 19 | If you use the datasets above, please consider citing their original papers. 20 | 21 | ## Training and Evaluation 22 | 23 | We have similar training and evalutaion scripts to HMR. Please refer to the original [HMR help doc](https://github.com/akanazawa/hmr/blob/master/doc/train.md) for details. Note that you can specify the multi-view dataset IDs in src_ortho/data_loader.py. 24 | -------------------------------------------------------------------------------- /prepare_datasets.sh: -------------------------------------------------------------------------------- 1 | # --------------------------- 2 | # ----- SET YOUR PATH!! ----- 3 | # --------------------------- 4 | # This is the directory that contains README.txt 5 | LSP_DIR=/scratch1/hmr_multiview/data/lsp_dataset 6 | 7 | # This is the directory that contains README.txt 8 | LSP_EXT_DIR=/scratch1/hmr_multiview/data/lsp_extended 9 | 10 | # This is the directory that contains 'images' and 'annotations' 11 | MPII_DIR=/scratch1/hmr_multiview/data/mpii 12 | 13 | # This is where you want all of your tf_records to be saved: 14 | DATA_DIR=/scratch1/hmr_multiview/tf_datasets/ 15 | 16 | # This is the directory that contains README.txt, S1..S8, etc 17 | MPI_INF_3DHP_DIR=/scratch1/mpi_inf_3dhp/data/ 18 | # --------------------------- 19 | 20 | # This is the directory that contains 0_dress/karate, etc 21 | SYNTHETIC_DIR=/scratch1/wgan/results 22 | # --------------------------- 23 | 24 | MOSH_DIR=/scratch1/hmr_multiview/neutrMosh/ 25 | 26 | 27 | # --------------------------- 28 | # Run each command below from this directory. I advice to run each one independently. 29 | # --------------------------- 30 | # ----- LSP ----- 31 | # python -m src_ortho.datasets.lsp_to_tfrecords --img_directory $LSP_DIR --output_directory $DATA_DIR/lsp 32 | 33 | # ----- LSP-extended ----- 34 | # python -m src_ortho.datasets.lsp_to_tfrecords --img_directory $LSP_EXT_DIR --output_directory $DATA_DIR/lsp_ext 35 | 36 | # ----- MPII ----- 37 | # python -m src_ortho.datasets.mpii_to_tfrecords --img_directory $MPII_DIR --output_directory $DATA_DIR/mpii 38 | 39 | # ----- MPI-INF-3DHP ----- 40 | # python -m src_ortho.datasets.mpi_inf_3dhp_to_tfrecords --data_directory $MPI_INF_3DHP_DIR --output_directory $DATA_DIR/mpi_inf_3dhp 41 | # python -m src_ortho.datasets.mpi_inf_3dhp_to_tfrecords --split val --data_directory $MPI_INF_3DHP_DIR --output_directory $DATA_DIR/mpi_inf_3dhp 42 | # python -m src_ortho.datasets.mpi_inf_3dhp_test_to_tfrecords --data_directory $MPI_INF_3DHP_DIR --output_directory $DATA_DIR/mpi_inf_3dhp 43 | 44 | # ----- synthetic ----- 45 | python -m sr_orthoc.datasets.synthetic_to_tfrecords --data_directory $SYNTHETIC_DIR --output_directory $DATA_DIR/synthetic 46 | # python -m src_ortho.datasets.synthetic_to_tfrecords --split val --data_directory $SYNTHETIC_DIR --output_directory $DATA_DIR/synthetic 47 | 48 | # ----- COCO ----- 49 | # python -m src_ortho.datasets.coco_to_tfrecords --data_directory /scratch1/hmr_multiview/coco/data --output_directory $DATA_DIR/coco 50 | 51 | 52 | 53 | # ----- Mosh data, for each dataset ----- 54 | # CMU: 55 | # python -m src_ortho.datasets.smpl_to_tfrecords --dataset_name 'neutrSMPL_CMU' 56 | 57 | # # H3.6M: 58 | # python -m src_ortho.datasets.smpl_to_tfrecords --dataset_name 'neutrSMPL_H3.6' 59 | 60 | # # jointLim: 61 | # python -m src_ortho.datasets.smpl_to_tfrecords --dataset_name 'neutrSMPL_jointLim' 62 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # python requirements 2 | pip>=9.0 3 | scipy 4 | numpy 5 | opendr 6 | matplotlib 7 | scikit-image 8 | deepdish>=0.3 9 | opencv-python 10 | absl-py 11 | ipdb 12 | -------------------------------------------------------------------------------- /src_ortho/RunModel.py: -------------------------------------------------------------------------------- 1 | """ Evaluates a trained model using placeholders. """ 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | from os.path import join, exists, dirname 10 | 11 | from .tf_smpl import projection as proj_util 12 | from .tf_smpl.batch_smpl import SMPL 13 | from .models import get_encoder_fn_separate 14 | import deepdish as dd 15 | 16 | class RunModel(object): 17 | def __init__(self, config, num_views, num_true_views, sess=None): 18 | """ 19 | Args: 20 | config 21 | """ 22 | self.config = config 23 | self.load_path = config.load_path 24 | 25 | # Config + path 26 | if not config.load_path: 27 | raise Exception( 28 | "[!] You need to specify `load_path` to load a pretrained model" 29 | ) 30 | if not exists(config.load_path + '.index'): 31 | print('%s doesnt exist..' % config.load_path) 32 | import ipdb 33 | ipdb.set_trace() 34 | 35 | # Data 36 | self.batch_size = config.batch_size 37 | self.img_size = config.img_size 38 | 39 | self.data_format = config.data_format 40 | self.smpl_model_path = config.smpl_model_path 41 | 42 | self.num_views = num_views 43 | self.num_true_views = num_true_views 44 | input_size = (self.batch_size, self.num_true_views, self.img_size, self.img_size, 3) 45 | self.images_pl = tf.placeholder(tf.float32, shape=input_size) 46 | 47 | # Model Settings 48 | self.num_stage = config.num_stage 49 | self.model_type = config.model_type 50 | self.joint_type = config.joint_type 51 | # Camera 52 | self.num_cam = 3 53 | self.proj_fn = proj_util.batch_orth_proj_idrot 54 | 55 | self.num_theta = 72 56 | # Theta size: camera (3) + pose (24*3) + shape (10) 57 | self.total_params = self.num_cam + self.num_theta + 10 58 | 59 | self.smpl = SMPL(self.smpl_model_path, joint_type=self.joint_type) 60 | 61 | # self.theta0_pl = tf.placeholder_with_default( 62 | # self.load_mean_param(), shape=[self.batch_size, self.total_params], name='theta0') 63 | # self.theta0_pl = tf.placeholder(tf.float32, shape=[None, self.total_params], name='theta0') 64 | 65 | self.build_test_model_ief() 66 | 67 | if sess is None: 68 | self.sess = tf.Session() 69 | else: 70 | self.sess = sess 71 | 72 | # Load data. 73 | self.saver = tf.train.Saver() 74 | self.prepare() 75 | 76 | 77 | def load_mean_param(self): 78 | mean = np.zeros((1, self.total_params)) 79 | # Initialize scale at 0.9 80 | mean[0, 0] = 0.9 81 | mean_path = join( 82 | dirname(self.smpl_model_path), 'neutral_smpl_mean_params.h5') 83 | mean_vals = dd.io.load(mean_path) 84 | 85 | mean_pose = mean_vals['pose'] 86 | # Ignore the global rotation. 87 | mean_pose[:3] = 0. 88 | mean_shape = mean_vals['shape'] 89 | 90 | # This initializes the global pose to be up-right when projected 91 | mean_pose[0] = np.pi 92 | 93 | mean[0, 3:] = np.hstack((mean_pose, mean_shape)) 94 | self.mean_np = mean[0] 95 | mean = tf.constant(mean, tf.float32) 96 | self.mean_var = tf.Variable( 97 | mean, name="mean_param", dtype=tf.float32, trainable=False) 98 | init_mean = tf.tile(self.mean_var, [1, 1]) 99 | return init_mean 100 | 101 | 102 | def build_test_model_ief(self): 103 | # Load mean value 104 | self.mean_var = self.load_mean_param() 105 | 106 | img_enc_fn, threed_enc_fn = get_encoder_fn_separate(self.model_type) 107 | # Extract image features. 108 | self.img_feat, self.E_var = [],[] 109 | for i in range(self.num_views): 110 | tmp0, tmp1 = img_enc_fn( 111 | self.images_pl[:,i%self.num_true_views,:,:], is_training=False, reuse=(i>0)) 112 | self.img_feat.append(tmp0) 113 | self.E_var.append(tmp1) 114 | self.E_var = self.E_var[0] 115 | 116 | # Start loop 117 | self.all_verts = [] 118 | self.all_kps = [] 119 | self.all_cams = [] 120 | self.all_Js = [] 121 | self.final_thetas = [] 122 | tmp = tf.tile(self.mean_var, [self.batch_size, 1]) 123 | theta_prev = [tmp for _ in range(self.num_views)] 124 | multiplier = 1 125 | for stage in np.arange(self.num_stage * multiplier): 126 | print('Iteration %d' % stage) 127 | for i in range(self.num_views): 128 | # ---- Compute outputs 129 | im1 = (i-1+self.num_views) % self.num_views 130 | theta_prev[i] = tf.concat([theta_prev[i][:, :self.num_cam+3], theta_prev[im1][:, self.num_cam+3:]], axis=1) 131 | state = tf.concat([self.img_feat[i], theta_prev[i]], 1) 132 | 133 | if i == 0 and stage == 0: 134 | delta_theta, _ = threed_enc_fn( 135 | state, 136 | num_output=self.total_params, 137 | is_training=False, 138 | reuse=False) 139 | else: 140 | delta_theta, _ = threed_enc_fn( 141 | state, 142 | num_output=self.total_params, 143 | is_training=False, 144 | reuse=True) 145 | 146 | # Compute new theta 147 | theta_here = theta_prev[i] + delta_theta 148 | theta_prev[i] = theta_here 149 | # use last pred as global pred 150 | for i in range(0, self.num_views): 151 | theta_here = tf.concat([theta_prev[i][:, :self.num_cam+3], theta_prev[0][:, self.num_cam+3:]], axis=1) 152 | cams = theta_here[:, :self.num_cam] 153 | poses = theta_here[:, self.num_cam:(self.num_cam + self.num_theta)] 154 | shapes = theta_here[:, (self.num_cam + self.num_theta):] 155 | verts, Js, _ = self.smpl(shapes, poses, get_skin=True) 156 | # Project to 2D! 157 | pred_kp = self.proj_fn(Js, cams, name='proj_2d_stage%d' % i) 158 | self.all_verts.append(verts) 159 | self.all_kps.append(pred_kp) 160 | self.all_cams.append(cams) 161 | self.all_Js.append(Js) 162 | # save each theta. 163 | self.final_thetas.append(theta_here) 164 | 165 | 166 | 167 | def prepare(self): 168 | print('Restoring checkpoint %s..' % self.load_path) 169 | self.saver.restore(self.sess, self.load_path) 170 | self.mean_value = self.sess.run(self.mean_var) 171 | 172 | def predict(self, images, get_theta=False): 173 | """ 174 | images: num_batch, img_size, img_size, 3 175 | Preprocessed to range [-1, 1] 176 | """ 177 | results = self.predict_dict(images) 178 | # thetas = results['theta'] 179 | # print('ori cam\n', self.mean_np[:3]) 180 | # print('ori rot\n', self.mean_np[3:6]) 181 | # print('ori pose\n', self.mean_np[6:75]) 182 | # print('ori shape\n', self.mean_np[75:]) 183 | # for i in range(12): 184 | # print('--------------------stage ',np.floor(i/4),' view ',i%4) 185 | # cam_prev = thetas[i-4][0] if i>=4 else self.mean_np 186 | # print('cam delta\n',(thetas[i][0]-cam_prev)[:3]) 187 | # print('rot delta\n',(thetas[i][0]-cam_prev)[3:6]) 188 | # param_prev = thetas[i-1][0] if i>=1 else self.mean_np 189 | # print('pose delta\n', (thetas[i][0]-param_prev)[6:75]) 190 | # print('shape delta\n', (thetas[i][0]-param_prev)[75:]) 191 | if get_theta: 192 | return results['joints'], results['verts'], results['cams'], results[ 193 | 'joints3d'], results['theta'] 194 | else: 195 | return results['joints'], results['verts'], results['cams'], results[ 196 | 'joints3d'] 197 | 198 | def predict_dict(self, images): 199 | """ 200 | images: num_batch, img_size, img_size, 3 201 | Preprocessed to range [-1, 1] 202 | Runs the model with images. 203 | """ 204 | feed_dict = { 205 | self.images_pl: images, 206 | # self.theta0_pl: self.mean_var, 207 | } 208 | pad = self.num_views-self.num_true_views 209 | st = self.num_views 210 | en = pad 211 | if en != 0: 212 | fetch_dict = { 213 | 'joints': self.all_kps[-st:-en], 214 | 'verts': self.all_verts[-st:-en], 215 | 'cams': self.all_cams[-st:-en], 216 | 'joints3d': self.all_Js[-st:-en], 217 | 'theta': self.final_thetas[-st:-en], 218 | } 219 | else: 220 | fetch_dict = { 221 | 'joints': self.all_kps[-st:], 222 | 'verts': self.all_verts[-st:], 223 | 'cams': self.all_cams[-st:], 224 | 'joints3d': self.all_Js[-st:], 225 | 'theta': self.final_thetas[-st:], 226 | } 227 | 228 | results = self.sess.run(fetch_dict, feed_dict) 229 | 230 | # Return joints in original image space. 231 | joints = np.array(results['joints']) 232 | results['joints'] = ((joints + 1) * 0.5) * self.img_size 233 | #print(results['theta']) 234 | #results['theta'] = results['theta'][0:1] 235 | return results 236 | -------------------------------------------------------------------------------- /src_ortho/RunModel.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/RunModel.pyc -------------------------------------------------------------------------------- /src_ortho/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/__init__.py -------------------------------------------------------------------------------- /src_ortho/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/__init__.pyc -------------------------------------------------------------------------------- /src_ortho/benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/benchmark/__init__.py -------------------------------------------------------------------------------- /src_ortho/benchmark/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/benchmark/__init__.pyc -------------------------------------------------------------------------------- /src_ortho/benchmark/eval_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utils for evaluation. 3 | """ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import numpy as np 10 | import scipy 11 | from scipy.spatial.distance import directed_hausdorff 12 | 13 | 14 | def compute_similarity_transform(S1, S2, verts1=None): 15 | ''' 16 | Computes a similarity transform (sR, t) that takes 17 | a set of 3D points S1 (3 x N) closest to a set of 3D points S2, 18 | where R is an 3x3 rotation matrix, t 3x1 translation, s scale. 19 | i.e. solves the orthogonal Procrutes problem. 20 | ''' 21 | if verts1 is None: 22 | verts1 = S1 23 | transposed = False 24 | if S1.shape[0] != 3 and S1.shape[0] != 2: 25 | S1 = S1.T 26 | verts1 = verts1.T 27 | S2 = S2.T 28 | transposed = True 29 | assert(S2.shape[1] == S1.shape[1]) 30 | 31 | # 1. Remove mean. 32 | mu1 = S1.mean(axis=1, keepdims=True) 33 | mu2 = S2.mean(axis=1, keepdims=True) 34 | X1 = S1 - mu1 35 | X2 = S2 - mu2 36 | 37 | # 2. Compute variance of X1 used for scale. 38 | var1 = np.sum(X1**2) 39 | 40 | # 3. The outer product of X1 and X2. 41 | K = X1.dot(X2.T) 42 | 43 | # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are 44 | # singular vectors of K. 45 | U, s, Vh = np.linalg.svd(K) 46 | V = Vh.T 47 | # Construct Z that fixes the orientation of R to get det(R)=1. 48 | Z = np.eye(U.shape[0]) 49 | Z[-1, -1] *= np.sign(np.linalg.det(U.dot(V.T))) 50 | # Construct R. 51 | R = V.dot(Z.dot(U.T)) 52 | 53 | # 5. Recover scale. 54 | scale = np.trace(R.dot(K)) / var1 55 | 56 | # 6. Recover translation. 57 | t = mu2 - scale*(R.dot(mu1)) 58 | 59 | # 7. Error: 60 | S1_hat = scale*R.dot(S1) + t 61 | verts1_hat = scale*R.dot(verts1) + t 62 | 63 | if transposed: 64 | S1_hat = S1_hat.T 65 | verts1_hat = verts1_hat.T 66 | 67 | return S1_hat, verts1_hat 68 | 69 | 70 | def align_by_pelvis(joints, get_pelvis=False): 71 | """ 72 | Assumes joints is 14 x 3 in LSP order. 73 | Then hips are: [3, 2] 74 | Takes mid point of these points, then subtracts it. 75 | """ 76 | left_id = 3 77 | right_id = 2 78 | 79 | pelvis = (joints[left_id, :] + joints[right_id, :]) / 2. 80 | if get_pelvis: 81 | return joints - np.expand_dims(pelvis, axis=0), pelvis 82 | else: 83 | return joints - np.expand_dims(pelvis, axis=0) 84 | 85 | 86 | def compute_errors(gt3ds, preds, gtverts=None, pdverts=None, pfverts=None): 87 | """ 88 | Gets MPJPE after pelvis alignment + MPJPE after Procrustes. 89 | Evaluates on the 14 common joints. 90 | Inputs: 91 | - gt3ds: N x views * 14 x 3 92 | - preds: N x views * 14 x 3 93 | """ 94 | # TODO: compute hausdorff distance using pose and shape 95 | # 96 | # directed_hausdorff(u, v)[0] 97 | errors, errors_pa, error_haus, error_pa_haus = [], [], [], [] 98 | error_pf_haus = [] 99 | pcks, aucs, pcks_pa, aucs_pa = [], [], [], [] 100 | print(gt3ds.shape, preds.shape) 101 | assert gt3ds.shape == preds.shape 102 | num_views = gt3ds.shape[1] 103 | for i, (gt3d_, pred_) in enumerate(zip(gt3ds, preds)): 104 | for j in range(num_views): 105 | gt3d = gt3d_[j].reshape(-1, 3) 106 | # Root align. 107 | gt3d = align_by_pelvis(gt3d) 108 | pred3d = align_by_pelvis(pred_[j]) 109 | 110 | joint_error = np.sqrt(np.sum((gt3d - pred3d)**2, axis=1)) 111 | errors.append(np.mean(joint_error)) 112 | if gtverts is not None: 113 | u = gtverts[i, j] 114 | v = pdverts[i, j] 115 | w = pfverts[i, j] 116 | haus_err = max(directed_hausdorff(u, v)[0], directed_hausdorff(v, u)[0]) 117 | pf_haus = max(directed_hausdorff(u, w)[0], directed_hausdorff(w, u)[0]) 118 | error_haus.append(haus_err) 119 | error_pf_haus.append(pf_haus) 120 | else: 121 | v = None 122 | auc = 0 123 | for k in range(1, 151, 5): 124 | pck = np.sum(joint_error <= k) / 14.0 * 100 125 | auc += pck / 30.0 126 | aucs.append(auc) 127 | pcks.append(pck) 128 | 129 | # Get PA error. 130 | pred3d_sym, v = compute_similarity_transform(pred3d, gt3d, v) 131 | pa_error = np.sqrt(np.sum((gt3d - pred3d_sym)**2, axis=1)) 132 | errors_pa.append(np.mean(pa_error)) 133 | if gtverts is not None: 134 | u = gtverts[i, j] 135 | haus_pa_err = max(directed_hausdorff(u, v)[0], directed_hausdorff(v, u)[0]) 136 | error_pa_haus.append(haus_pa_err) 137 | else: 138 | auc = 0 139 | for k in range(1, 151, 5): 140 | pck = np.sum(pa_error <= k) / 14.0 * 100 141 | auc += pck / 30.0 142 | aucs_pa.append(auc) 143 | pcks_pa.append(pck) 144 | 145 | if gtverts is None: 146 | return errors, errors_pa, pcks, aucs, pcks_pa, aucs_pa 147 | else: 148 | return errors, errors_pa, error_haus, error_pa_haus, error_pf_haus 149 | -------------------------------------------------------------------------------- /src_ortho/benchmark/eval_util.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/benchmark/eval_util.pyc -------------------------------------------------------------------------------- /src_ortho/benchmark/evaluate_h36m_multi.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluation of H3.6M. 3 | 4 | Sample call from hmr: 5 | python -m src.benchmark.evaluate_h36m --batch_size=500 --load_path= 6 | python -m src.benchmark.evaluate_h36m --batch_size=500 --load_path=/home/kanazawa/projects/hmr_v2/models/model.ckpt-667589 7 | """ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import itertools 14 | from absl import flags 15 | import numpy as np 16 | import deepdish as dd 17 | from time import time 18 | from os.path import exists, join, expanduser, split 19 | from os import makedirs 20 | 21 | import tensorflow as tf 22 | 23 | from src_ortho.config import get_config 24 | from ..util import renderer as vis_util 25 | from ..RunModel import RunModel 26 | from .eval_util import compute_errors 27 | from ..datasets.common import read_images_from_tfrecords 28 | 29 | kPredDir = '/tmp/hmr_output' 30 | # Change to where you saved your tfrecords 31 | kTFDataDir = '/scratch1/williamljb/hmr_multiview/tf_datasets/human36m_multi' 32 | 33 | flags.DEFINE_string('pred_dir', kPredDir, 34 | 'where to save model output of h36m') 35 | flags.DEFINE_string('tfh36m_dir', kTFDataDir, 36 | 'data dir: top of h36m in tf_records') 37 | flags.DEFINE_integer( 38 | 'protocol', 1, 39 | 'If 2, then only frontal cam (3) and trial 1, if 1, then all camera & trials' 40 | ) 41 | flags.DEFINE_boolean( 42 | 'vis', False, 'If true, visualizes the best and worst 30 results.') 43 | 44 | model = None 45 | sess = None 46 | # For visualization. 47 | renderer = None 48 | extreme_errors, contents = [], [] 49 | import os 50 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 51 | 52 | 53 | # -- Draw Utils --- 54 | def draw_content(content, config): 55 | global renderer 56 | input_img = content['image'] 57 | vert = content['vert'] 58 | joint = content['joint'] 59 | cam = content['cam'] 60 | img_size = config.img_size 61 | # Undo preprocessing 62 | img = (input_img + 1) * 0.5 * 255 63 | tz = renderer.flength / (0.5 * img_size * cam[0]) 64 | trans = np.hstack([cam[1:], tz]) 65 | vert_shifted = vert + trans 66 | # Draw 67 | skel_img = vis_util.draw_skeleton(img, joint) 68 | rend_img = renderer(vert_shifted, img_size=(img_size, img_size)) 69 | another_vp = renderer.rotated(vert_shifted, 90, img_size=(img_size, img_size), do_alpha=False) 70 | another_vp = vis_util.draw_text(another_vp, {"diff_viewpoint": 90}) 71 | 72 | tog0 = np.hstack((img, rend_img)) 73 | tog1 = np.hstack((skel_img, another_vp)) 74 | 75 | all_img = np.vstack((tog0, tog1)).astype(np.uint8) 76 | # import matplotlib.pyplot as plt 77 | # plt.ion() 78 | # plt.figure(1) 79 | # plt.clf() 80 | # plt.imshow(all_img) 81 | # plt.axis('off') 82 | # import ipdb; ipdb.set_trace() 83 | 84 | return all_img 85 | 86 | 87 | # -- Utils --- 88 | def get_pred_dir(base_dir, load_path): 89 | bpath, checkpt_name = split(load_path) 90 | bpath, model_name = split(bpath) 91 | _, log_name = split(bpath) 92 | pred_dir = join( 93 | expanduser(base_dir), '-'.join([log_name, model_name, checkpt_name])) 94 | return pred_dir 95 | 96 | 97 | def get_h36m_seqs(protocol=2): 98 | action_names = [ 99 | 'Directions', 'Discussion', 'Eating', 'Greeting', 'Phoning', 'Posing', 100 | 'Purchases', 'Sitting', 'SittingDown', 'Smoking', 'TakingPhoto', 101 | 'Waiting', 'Walking', 'WakingDog', 'WalkTogether' 102 | ] 103 | print('Protocol %d!!' % protocol) 104 | if protocol == 2: 105 | trial_ids = [0] 106 | else: 107 | trial_ids = [0, 1] 108 | 109 | sub_ids = [9, 11] 110 | all_pairs = [ 111 | p 112 | for p in list( 113 | itertools.product(*[sub_ids, action_names, trial_ids])) 114 | ] 115 | # Corrupt mp4 file 116 | all_pairs = [p for p in all_pairs if p != (11, 'Directions', 1)] 117 | 118 | return all_pairs, action_names 119 | 120 | 121 | # -- Core: --- 122 | def get_data(seq_name, config): 123 | """ 124 | Read preprocessed image from tfrecords. 125 | """ 126 | global sess 127 | if sess is None: 128 | sess = tf.Session() 129 | 130 | tf_path = join( 131 | expanduser(config.tfh36m_dir), 'test', seq_name + '.tfrecord') 132 | images, kps, gt3ds, _, _ = read_images_from_tfrecords( 133 | tf_path, 4, img_size=config.img_size, sess=sess) 134 | 135 | return images, gt3ds 136 | 137 | 138 | def run_model(images, config): 139 | """ 140 | Runs trained model to get predictions on each seq. 141 | """ 142 | global model, sess 143 | # Setup model with this config. 144 | num_views = 4 145 | if model is None: 146 | model = RunModel(config, num_views, 4, sess=sess) 147 | 148 | N = len(images) 149 | all_joints, all_verts, all_cams, all_joints3d, all_thetas = [], [], [], [], [] 150 | 151 | # Batch + preprocess.. 152 | batch_size = config.batch_size 153 | num_total_batches = int(np.ceil(float(N) / batch_size)) 154 | for b in xrange(num_total_batches): 155 | print('Batch %d/%d' % (b, num_total_batches)) 156 | start_ind = b * batch_size 157 | end_ind = (b + 1) * batch_size 158 | images_here = images[start_ind:end_ind] 159 | 160 | if end_ind > N: 161 | end_ind = N 162 | # Need to pad dummy bc batch size is not dynamic,, 163 | num_here = images_here.shape[0] 164 | images_wdummy = np.vstack([ 165 | images_here, 166 | np.zeros((batch_size - num_here, 4, config.img_size, 167 | config.img_size, 3)) 168 | ]) 169 | joints, verts, cams, joints3d, thetas = model.predict( 170 | images_wdummy, get_theta=True) 171 | joints = [joint[:num_here] for joint in joints] 172 | verts = [vert[:num_here] for vert in verts] 173 | cams = [cam[:num_here] for cam in cams] 174 | joints3d = [joints3[:num_here] for joints3 in joints3d] 175 | thetas = [theta[:num_here] for theta in thetas] 176 | else: 177 | joints, verts, cams, joints3d, thetas = model.predict( 178 | images_here, get_theta=True) 179 | 180 | all_joints.append(joints) 181 | all_verts.append(verts) 182 | all_cams.append(cams) 183 | all_joints3d.append(joints3d) 184 | all_thetas.append(thetas) 185 | 186 | preds = { 187 | 'verts': np.hstack(all_verts), 188 | 'cams': np.hstack(all_cams), 189 | 'joints': np.hstack(all_joints), 190 | 'joints3d': np.hstack(all_joints3d), 191 | 'thetas': np.hstack(all_thetas) 192 | } 193 | 194 | # Check output. 195 | # for i in xrange(10): 196 | # content = { 197 | # 'vert': preds['verts'][i], 198 | # 'joint': preds['joints'][i], 199 | # 'image': images[i], 200 | # 'cam': preds['cams'][i], 201 | # } 202 | # rend_img = draw_content(content, config) 203 | 204 | return preds 205 | 206 | 207 | def add_visuals(errors, results, images): 208 | global extreme_errors, contents 209 | # Record extreme ones 210 | sort_inds = np.argsort(errors)[::-1] 211 | # Save top/worst 10. 212 | for i in xrange(10): 213 | ind = sort_inds[i] 214 | content = { 215 | 'vert': results['verts'][ind], 216 | 'joint': results['joints'][ind], 217 | 'image': images[ind][0], 218 | 'cam': results['cams'][ind], 219 | } 220 | extreme_errors.append(errors[ind]) 221 | contents.append(content) 222 | # Save best too. 223 | best_ind = sort_inds[-(i + 1)] 224 | content = { 225 | 'vert': results['verts'][best_ind], 226 | 'joint': results['joints'][best_ind], 227 | 'image': images[best_ind][0], 228 | 'cam': results['cams'][best_ind], 229 | } 230 | extreme_errors.append(errors[best_ind]) 231 | contents.append(content) 232 | 233 | 234 | def evaluate_sequence(seq_info, pred_dir): 235 | sub_id, action, trial_id = seq_info 236 | 237 | seq_name = 'S%d/%s_%d' % (sub_id, action, trial_id) 238 | file_seq_name = 'S%d_%s_%d' % (sub_id, action, trial_id) 239 | print('%s' % (seq_name)) 240 | 241 | save_path = join(pred_dir, file_seq_name + '_pred.h5') 242 | if exists(save_path): 243 | results = dd.io.load(save_path) 244 | errors = results['errors'] 245 | errors_pa = results['errors_pa'] 246 | if config.vis: 247 | # Need to load images too.. 248 | images, gt3ds = get_data(file_seq_name, config) 249 | pred3ds = np.array(results['joints3d'])[:, :, :14, :].transpose((1, 0, 2, 3)) 250 | else: 251 | # Run the model! 252 | t0 = time() 253 | images, gt3ds = get_data(file_seq_name, config) 254 | 255 | results = run_model(images, config) 256 | t1 = time() 257 | print('Took %g sec for %d imgs' % (t1 - t0, len(results['verts']))) 258 | 259 | # Evaluate! 260 | # Joints 3D is COCOplus format now. First 14 is H36M joints 261 | pred3ds = np.array(results['joints3d'])[:, :, :14, :].transpose((1, 0, 2, 3)) 262 | # Convert to mm! 263 | errors, errors_pa,_,_,_,_ = compute_errors(gt3ds * 1000., pred3ds * 1000.) 264 | 265 | results['errors'] = errors 266 | results['errors_pa'] = errors_pa 267 | # Save results 268 | dd.io.save(save_path, results) 269 | 270 | if config.vis: 271 | add_visuals(errors, results, images) 272 | 273 | return errors, errors_pa 274 | 275 | 276 | def main(config): 277 | # Figure out the save name. 278 | pred_dir = get_pred_dir(config.pred_dir, config.load_path) 279 | protocol = config.protocol 280 | pred_dir += 'h36m_multi' 281 | print('\n***\nsaving predictions in %s\n***\n' % pred_dir) 282 | 283 | if not exists(pred_dir): 284 | makedirs(pred_dir) 285 | 286 | if config.vis: 287 | global renderer 288 | # Bad impl with global.. 289 | global extreme_errors, contents 290 | renderer = vis_util.SMPLRenderer( 291 | img_size=config.img_size, face_path=config.smpl_face_path) 292 | 293 | all_pairs, actions = get_h36m_seqs(protocol) 294 | 295 | all_errors = {} 296 | all_errors_pa = {} 297 | raw_errors, raw_errors_pa = [], [] 298 | for itr, seq_info in enumerate(all_pairs): 299 | print('%d/%d' % (itr, len(all_pairs))) 300 | sub_id, action, trial_id = seq_info 301 | errors, errors_pa = evaluate_sequence(seq_info, pred_dir) 302 | mean_error = np.mean(errors) 303 | mean_error_pa = np.mean(errors_pa) 304 | med_error = np.median(errors) 305 | raw_errors.append(errors) 306 | raw_errors_pa.append(errors_pa) 307 | print('====================') 308 | print('mean error: %g, median: %g, PA mean: %g' % (mean_error, med_error, mean_error_pa)) 309 | raws = np.hstack(raw_errors) 310 | raws_pa = np.hstack(raw_errors_pa) 311 | print('Running average - mean: %g, median: %g' % (np.mean(raws), 312 | np.median(raws))) 313 | print('Running average - PA mean: %g, median: %g' % 314 | (np.mean(raws_pa), np.median(raws_pa))) 315 | print('====================') 316 | if action in all_errors.keys(): 317 | all_errors[action].append(mean_error) 318 | all_errors_pa[action].append(mean_error_pa) 319 | else: 320 | all_errors[action] = [mean_error] 321 | all_errors_pa[action] = [mean_error_pa] 322 | 323 | # all_act_errors = [] 324 | # all_act_errors_pa = [] 325 | # for act in actions: 326 | # print('%s mean error %g, PA error %g' % (act, np.mean(all_errors[act]), 327 | # np.mean(all_errors_pa[act]))) 328 | # all_act_errors.append(np.mean(all_errors[act])) 329 | # all_act_errors_pa.append(np.mean(all_errors_pa[act])) 330 | 331 | # print('--for %s--' % config.load_path) 332 | # print('Average error over all seq (over action) 3d: %g, PA: %g' % 333 | # (np.mean(all_act_errors), np.mean(all_act_errors_pa))) 334 | 335 | # act_names_in_order = [ 336 | # 'Directions', 'Discussion', 'Eating', 'Greeting', 'Phoning', 337 | # 'TakingPhoto', 'Posing', 'Purchases', 'Sitting', 'SittingDown', 338 | # 'Smoking', 'Waiting', 'WakingDog', 'Walking', 'WalkTogether' 339 | # ] 340 | # act_error = [ 341 | # '%.2f' % np.mean(all_errors[act]) for act in act_names_in_order 342 | # ] 343 | # act_PA_error = [ 344 | # '%.2f' % np.mean(all_errors_pa[act]) for act in act_names_in_order 345 | # ] 346 | 347 | # act_names_in_order.append('Average') 348 | # act_error.append('%.2f' % np.mean(all_act_errors)) 349 | # act_PA_error.append('%.2f' % np.mean(all_act_errors_pa)) 350 | # print('---for excel---') 351 | # print(', '.join(act_names_in_order)) 352 | # print(', '.join(act_error)) 353 | # print('With Alignment:') 354 | # print(', '.join(act_PA_error)) 355 | 356 | err_pa = np.hstack(raw_errors_pa) 357 | MPJPE = np.mean(np.hstack(raw_errors)) 358 | PA_MPJPE = np.mean(err_pa) 359 | print('Average error over all joints 3d: %g, PA: %g' % (MPJPE, PA_MPJPE)) 360 | 361 | err = np.hstack(raw_errors) 362 | median = np.median(np.hstack(raw_errors)) 363 | pa_median = np.median(np.hstack(err_pa)) 364 | print( 365 | 'Percentiles 90th: %.1f 70th: %.1f 50th: %.1f 30th: %.1f 10th: %.1f' % 366 | (np.percentile(err, 90), np.percentile(err, 70), 367 | np.percentile(err, 50), np.percentile(err, 30), 368 | np.percentile(err, 10))) 369 | 370 | print('MPJPE: %.2f, PA-MPJPE: %.2f, Median: %.2f, PA-Median: %.2f' % 371 | (MPJPE, PA_MPJPE, median, pa_median)) 372 | 373 | if config.vis: 374 | global extreme_errors, contents 375 | import matplotlib.pyplot as plt 376 | # plt.ion() 377 | plt.figure(1) 378 | plt.clf() 379 | sort_inds = np.argsort(extreme_errors)[::-1] 380 | for i in xrange(30): 381 | bad_ind = sort_inds[i] 382 | bad_error = extreme_errors[bad_ind] 383 | bad_img = draw_content(contents[bad_ind], config) 384 | plt.figure(1) 385 | plt.clf() 386 | plt.imshow(bad_img) 387 | plt.axis('off') 388 | plt.title('%d-th worst, mean error %.2fmm' % (i, bad_error)) 389 | 390 | good_ind = sort_inds[-(i+1)] 391 | good_error = extreme_errors[good_ind] 392 | good_img = draw_content(contents[good_ind], config) 393 | plt.figure(2) 394 | plt.clf() 395 | plt.imshow(good_img) 396 | plt.axis('off') 397 | plt.title('%d-th best, mean error %.2fmm' % (i, good_error)) 398 | plt.draw() 399 | plt.show() 400 | # import ipdb; ipdb.set_trace() 401 | 402 | 403 | 404 | if __name__ == '__main__': 405 | config = get_config() 406 | if not config.load_path: 407 | raise Exception('Must specify a model to use to predict!') 408 | if 'model.ckpt' not in config.load_path: 409 | raise Exception('Must specify a model checkpoint!') 410 | main(config) 411 | -------------------------------------------------------------------------------- /src_ortho/benchmark/evaluate_mpi3dhp_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluation of H3.6M. 3 | 4 | Sample call from hmr: 5 | python -m src.benchmark.evaluate_mpi3dhp --batch_size=50 --load_path= 6 | python -m src.benchmark.evaluate_h36m --batch_size=500 --load_path=/home/kanazawa/projects/hmr_v2/models/model.ckpt-667589 7 | """ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import itertools 14 | from absl import flags 15 | import numpy as np 16 | import deepdish as dd 17 | from time import time 18 | from os.path import exists, join, expanduser, split 19 | from os import makedirs 20 | 21 | import tensorflow as tf 22 | 23 | from src_ortho.config import get_config 24 | from ..util import renderer as vis_util 25 | from ..RunModel import RunModel 26 | from .eval_util import compute_errors, align_by_pelvis 27 | from ..datasets.common import read_images_from_tfrecords 28 | 29 | kPredDir = '/tmp/hmr_output' 30 | # Change to where you saved your tfrecords 31 | kTFDataDir = '/scratch1/williamljb/hmr_multiview/tf_datasets/mpi_inf_3dhp' 32 | 33 | flags.DEFINE_string('pred_dir', kPredDir, 34 | 'where to save model output of h36m') 35 | flags.DEFINE_string('tfh36m_dir', kTFDataDir, 36 | 'data dir: top of h36m in tf_records') 37 | flags.DEFINE_integer( 38 | 'protocol', 1, 39 | 'If 2, then only frontal cam (3) and trial 1, if 1, then all camera & trials' 40 | ) 41 | flags.DEFINE_boolean( 42 | 'vis', False, 'If true, visualizes the best and worst 30 results.') 43 | 44 | model = None 45 | sess = None 46 | # For visualization. 47 | renderer = None 48 | extreme_errors, contents = [], [] 49 | import os 50 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 51 | 52 | 53 | # -- Draw Utils --- 54 | def draw_content(content, config, ind): 55 | global renderer 56 | input_img = content['image'][ind] 57 | vert = content['vert'][ind] 58 | joint = content['joint'][ind] 59 | cam = content['cam'][ind] 60 | img_size = config.img_size 61 | kp = ((content['kp'][ind] + 1) * 0.5) * img_size 62 | # Undo preprocessing 63 | img = (input_img + 1) * 0.5 * 255 64 | tz = renderer.flength / (0.5 * img_size * cam[0]) 65 | trans = np.hstack([cam[1:], tz]) 66 | vert_shifted = vert + trans 67 | print('----------------------------------------') 68 | print(align_by_pelvis(content['pred3ds'][ind]) - align_by_pelvis(content['gt3ds'][ind])) 69 | # Draw 70 | skel_img = vis_util.draw_skeleton(img, joint) 71 | img = vis_util.draw_skeleton(img, kp, draw_edges=False) 72 | rend_img = renderer(vert_shifted, img_size=(img_size, img_size)) 73 | another_vp = renderer.rotated(vert_shifted, 90, img_size=(img_size, img_size), do_alpha=False) 74 | another_vp = vis_util.draw_text(another_vp, {"diff_viewpoint": 90}) 75 | 76 | tog0 = np.hstack((img, rend_img)) 77 | tog1 = np.hstack((skel_img, another_vp)) 78 | 79 | all_img = np.vstack((tog0, tog1)).astype(np.uint8) 80 | # import matplotlib.pyplot as plt 81 | # plt.ion() 82 | # plt.figure(1) 83 | # plt.clf() 84 | # plt.imshow(all_img) 85 | # plt.axis('off') 86 | # import ipdb; ipdb.set_trace() 87 | 88 | return all_img 89 | 90 | 91 | # -- Utils --- 92 | def get_pred_dir(base_dir, load_path): 93 | bpath, checkpt_name = split(load_path) 94 | bpath, model_name = split(bpath) 95 | _, log_name = split(bpath) 96 | pred_dir = join( 97 | expanduser(base_dir), '-'.join([log_name, model_name, checkpt_name])) 98 | return pred_dir 99 | 100 | 101 | def get_seqs(): 102 | st = 0 103 | l = 6 104 | all_pairs = [('%04d'%i) for i in range(st, st+l)] 105 | 106 | return all_pairs 107 | 108 | 109 | # -- Core: --- 110 | def get_data(seq_name, config): 111 | """ 112 | Read preprocessed image from tfrecords. 113 | """ 114 | global sess 115 | if sess is None: 116 | sess = tf.Session() 117 | 118 | tf_path = join( 119 | expanduser(config.tfh36m_dir), 'test', 'test_' + seq_name + '.tfrecord') 120 | images, kps, gt3ds, _, _ = read_images_from_tfrecords( 121 | tf_path, 4, img_size=config.img_size, sess=sess) 122 | 123 | return images, kps, gt3ds 124 | 125 | 126 | def run_model(images, config): 127 | """ 128 | Runs trained model to get predictions on each seq. 129 | """ 130 | global model, sess 131 | # Setup model with this config. 132 | num_views = 4 133 | if model is None: 134 | model = RunModel(config, num_views, 4, sess=sess) 135 | 136 | N = len(images) 137 | all_joints, all_verts, all_cams, all_joints3d, all_thetas = [], [], [], [], [] 138 | 139 | # Batch + preprocess.. 140 | batch_size = config.batch_size 141 | num_total_batches = int(np.ceil(float(N) / batch_size)) 142 | for b in xrange(num_total_batches): 143 | print('Batch %d/%d' % (b, num_total_batches)) 144 | start_ind = b * batch_size 145 | end_ind = (b + 1) * batch_size 146 | images_here = images[start_ind:end_ind] 147 | 148 | if end_ind > N: 149 | end_ind = N 150 | # Need to pad dummy bc batch size is not dynamic,, 151 | num_here = images_here.shape[0] 152 | images_wdummy = np.vstack([ 153 | images_here, 154 | np.zeros((batch_size - num_here, 4, config.img_size, 155 | config.img_size, 3)) 156 | ]) 157 | joints, verts, cams, joints3d, thetas = model.predict( 158 | images_wdummy, get_theta=True) 159 | joints = [joint[:num_here] for joint in joints] 160 | verts = [vert[:num_here] for vert in verts] 161 | cams = [cam[:num_here] for cam in cams] 162 | joints3d = [joints3[:num_here] for joints3 in joints3d] 163 | thetas = [theta[:num_here] for theta in thetas] 164 | else: 165 | joints, verts, cams, joints3d, thetas = model.predict( 166 | images_here, get_theta=True) 167 | 168 | all_joints.append(joints) 169 | all_verts.append(verts) 170 | all_cams.append(cams) 171 | all_joints3d.append(joints3d) 172 | all_thetas.append(thetas) 173 | 174 | # view * B * size 175 | preds = { 176 | 'verts': np.hstack(all_verts), 177 | 'cams': np.hstack(all_cams), 178 | 'joints': np.hstack(all_joints), 179 | 'joints3d': np.hstack(all_joints3d), 180 | 'thetas': np.hstack(all_thetas) 181 | } 182 | 183 | # Check output. 184 | # for i in xrange(10): 185 | # content = { 186 | # 'vert': preds['verts'][i], 187 | # 'joint': preds['joints'][i], 188 | # 'image': images[i], 189 | # 'cam': preds['cams'][i], 190 | # } 191 | # rend_img = draw_content(content, config) 192 | 193 | return preds 194 | 195 | 196 | def add_visuals(errors, results, images, kps, pred3ds, gt3ds): 197 | global extreme_errors, contents 198 | # Record extreme ones 199 | sort_inds = np.argsort(errors)[::-1] 200 | # Save top/worst 10. 201 | for i in xrange(10): 202 | ind = sort_inds[i] 203 | indd4 = ind 204 | content = { 205 | 'vert': [results['verts'][view][indd4] for view in range(4)], 206 | 'joint': [results['joints'][view][indd4] for view in range(4)], 207 | 'image': [images[indd4][view] for view in range(4)], 208 | 'kp': [kps[indd4][view][:,:2] for view in range(4)], 209 | 'pred3ds': [pred3ds[indd4][view] for view in range(4)], 210 | 'gt3ds': [gt3ds[indd4][view] for view in range(4)], 211 | 'cam': [results['cams'][view][indd4] for view in range(4)], 212 | 'err': [errors[indd4] for view in range(4)], 213 | } 214 | extreme_errors.append(errors[ind]) 215 | contents.append(content) 216 | # Save best too. 217 | best_ind = sort_inds[-(i + 1)] 218 | best_indd4 = best_ind 219 | content = { 220 | 'vert': [results['verts'][view][best_indd4] for view in range(4)], 221 | 'joint': [results['joints'][view][best_indd4] for view in range(4)], 222 | 'image': [images[best_indd4][view] for view in range(4)], 223 | 'kp': [kps[best_indd4][view][:,:2] for view in range(4)], 224 | 'pred3ds': [pred3ds[best_indd4][view] for view in range(4)], 225 | 'gt3ds': [gt3ds[best_indd4][view] for view in range(4)], 226 | 'cam': [results['cams'][view][best_indd4] for view in range(4)], 227 | 'err': [errors[best_indd4] for view in range(4)], 228 | } 229 | extreme_errors.append(errors[best_ind]) 230 | contents.append(content) 231 | 232 | 233 | def evaluate_sequence(seq_info, pred_dir): 234 | print('%s' % (seq_info)) 235 | 236 | save_path = join(pred_dir, seq_info + '_pred.h5') 237 | if exists(save_path): 238 | results = dd.io.load(save_path) 239 | errors = results['errors'] 240 | errors_pa = results['errors_pa'] 241 | pcks = results['pcks'] 242 | pcks_pa = results['pcks_pa'] 243 | aucs = results['aucs'] 244 | aucs_pa = results['aucs_pa'] 245 | if config.vis: 246 | # Need to load images too.. 247 | images, kps, gt3ds = get_data(seq_info, config) 248 | pred3ds = np.array(results['joints3d'])[:, :, :14, :].transpose((1, 0, 2, 3)) 249 | else: 250 | # Run the model! 251 | t0 = time() 252 | images, kps, gt3ds = get_data(seq_info, config) 253 | 254 | results = run_model(images, config) 255 | t1 = time() 256 | print('Took %g sec for %d imgs' % (t1 - t0, len(results['verts'][0]))) 257 | 258 | # Evaluate! 259 | # Joints 3D is COCOplus format now. First 14 is H36M joints 260 | pred3ds = np.array(results['joints3d'])[:, :, :14, :].transpose((1, 0, 2, 3)) 261 | # Convert to mm! 262 | errors, errors_pa, pcks, aucs, pcks_pa, aucs_pa = compute_errors(gt3ds * 1000., pred3ds * 1000.) 263 | # errors = [np.min(errors[i*4:i*4+4]) for i in range(int(len(errors) / 4))] 264 | # errors_pa = [np.min(errors_pa[i*4:i*4+4]) for i in range(int(len(errors) / 4))] 265 | 266 | results['errors'] = errors 267 | results['errors_pa'] = errors_pa 268 | results['pcks'] = pcks 269 | results['pcks_pa'] = pcks_pa 270 | results['aucs'] = aucs 271 | results['aucs_pa'] = aucs_pa 272 | # Save results 273 | dd.io.save(save_path, results) 274 | 275 | if config.vis: 276 | add_visuals(errors, results, images, kps, pred3ds, gt3ds) 277 | 278 | return errors, errors_pa, pcks, aucs, pcks_pa, aucs_pa 279 | 280 | 281 | def main(config): 282 | # Figure out the save name. 283 | pred_dir = get_pred_dir(config.pred_dir, config.load_path) 284 | pred_dir += '3dhp_test' 285 | print('\n***\nsaving predictions in %s\n***\n' % pred_dir) 286 | 287 | if not exists(pred_dir): 288 | makedirs(pred_dir) 289 | 290 | if config.vis: 291 | global renderer 292 | # Bad impl with global.. 293 | global extreme_errors, contents 294 | renderer = vis_util.SMPLRenderer( 295 | img_size=config.img_size, face_path=config.smpl_face_path) 296 | 297 | all_pairs = get_seqs() 298 | 299 | all_errors = {} 300 | all_errors_pa = {} 301 | raw_errors, raw_errors_pa = [], [] 302 | raw_pcks, raw_pcks_pa = [], [] 303 | raw_aucs, raw_aucs_pa = [], [] 304 | for itr, seq_info in enumerate(all_pairs): 305 | print('%d/%d' % (itr, len(all_pairs))) 306 | errors, errors_pa, pcks, aucs, pcks_pa, aucs_pa = evaluate_sequence(seq_info, pred_dir) 307 | mean_error = np.mean(errors) 308 | mean_error_pa = np.mean(errors_pa) 309 | med_error = np.median(errors) 310 | raw_errors.append(errors) 311 | raw_errors_pa.append(errors_pa) 312 | raw_pcks.append(pcks) 313 | raw_pcks_pa.append(pcks_pa) 314 | raw_aucs.append(aucs) 315 | raw_aucs_pa.append(aucs_pa) 316 | print('====================') 317 | print('mean error: %g, median: %g, PA mean: %g' % (mean_error, med_error, mean_error_pa)) 318 | raws = np.hstack(raw_errors) 319 | raws_pa = np.hstack(raw_errors_pa) 320 | rawspcks = np.hstack(raw_pcks) 321 | rawspcks_pa = np.hstack(raw_pcks_pa) 322 | rawsaucs = np.hstack(raw_aucs) 323 | rawsaucs_pa = np.hstack(raw_aucs_pa) 324 | print('Running average - mean: %g, median: %g' % (np.mean(raws), 325 | np.median(raws))) 326 | print('Running average - pck: %g, auc: %g' % (np.mean(rawspcks), 327 | np.median(rawsaucs))) 328 | print('Running average - PA mean: %g, median: %g' % 329 | (np.mean(raws_pa), np.median(raws_pa))) 330 | print('Running average - pck: %g, auc: %g' % 331 | (np.mean(rawspcks_pa), np.median(rawsaucs_pa))) 332 | print('====================') 333 | 334 | print('--for %s--' % config.load_path) 335 | 336 | err_pa = np.hstack(raw_errors_pa) 337 | MPJPE = np.mean(np.hstack(raw_errors)) 338 | PA_MPJPE = np.mean(err_pa) 339 | print('Average error over all joints 3d: %g, PA: %g' % (MPJPE, PA_MPJPE)) 340 | 341 | err = np.hstack(raw_errors) 342 | median = np.median(np.hstack(raw_errors)) 343 | pa_median = np.median(np.hstack(err_pa)) 344 | print( 345 | 'Percentiles 90th: %.1f 70th: %.1f 50th: %.1f 30th: %.1f 10th: %.1f' % 346 | (np.percentile(err, 90), np.percentile(err, 70), 347 | np.percentile(err, 50), np.percentile(err, 30), 348 | np.percentile(err, 10))) 349 | 350 | print('MPJPE: %.2f, PA-MPJPE: %.2f, Median: %.2f, PA-Median: %.2f' % 351 | (MPJPE, PA_MPJPE, median, pa_median)) 352 | 353 | if config.vis: 354 | global extreme_errors, contents 355 | import matplotlib.pyplot as plt 356 | # plt.ion() 357 | plt.figure(1) 358 | plt.clf() 359 | sort_inds = np.argsort(extreme_errors)[::-1] 360 | for i in xrange(30): 361 | for j in range(4): 362 | bad_ind = sort_inds[i] 363 | bad_error = extreme_errors[bad_ind] 364 | bad_img = draw_content(contents[bad_ind], config, j) 365 | plt.figure(j+1) 366 | plt.clf() 367 | plt.imshow(bad_img) 368 | plt.axis('off') 369 | plt.title('%d-th worst, mean error %.2fmm' % (i, contents[bad_ind]['err'][j])) 370 | 371 | good_ind = sort_inds[-(i+1)] 372 | good_error = extreme_errors[good_ind] 373 | good_img = draw_content(contents[good_ind], config, j) 374 | plt.figure(j+5) 375 | plt.clf() 376 | plt.imshow(good_img) 377 | plt.axis('off') 378 | plt.title('%d-th best, mean error %.2fmm' % (i, good_error)) 379 | 380 | plt.draw() 381 | plt.show() 382 | 383 | 384 | 385 | if __name__ == '__main__': 386 | config = get_config() 387 | if not config.load_path: 388 | raise Exception('Must specify a model to use to predict!') 389 | if 'model.ckpt' not in config.load_path: 390 | raise Exception('Must specify a model checkpoint!') 391 | main(config) 392 | -------------------------------------------------------------------------------- /src_ortho/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sets default args 3 | 4 | Note all data format is NHWC because slim resnet wants NHWC. 5 | """ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import sys 12 | from absl import flags 13 | import os.path as osp 14 | from os import makedirs 15 | from glob import glob 16 | from datetime import datetime 17 | import json 18 | 19 | import numpy as np 20 | 21 | curr_path = osp.dirname(osp.abspath(__file__)) 22 | model_dir = osp.join(curr_path, '..', 'models') 23 | if not osp.exists(model_dir): 24 | print('Fix path to models/') 25 | import ipdb 26 | ipdb.set_trace() 27 | SMPL_MODEL_PATH = osp.join(model_dir, 'neutral_smpl_with_cocoplus_reg.pkl') 28 | SMPL_FACE_PATH = osp.join(curr_path, '../src_ortho/tf_smpl', 'smpl_faces.npy') 29 | 30 | # Default pred-trained model path for the demo. 31 | PRETRAINED_MODEL = osp.join(model_dir, 'model.ckpt-55124') 32 | 33 | flags.DEFINE_string('smpl_model_path', SMPL_MODEL_PATH, 34 | 'path to the neurtral smpl model') 35 | flags.DEFINE_string('smpl_face_path', SMPL_FACE_PATH, 36 | 'path to smpl mesh faces (for easy rendering)') 37 | flags.DEFINE_string('load_path', None, 'path to trained model') 38 | flags.DEFINE_string('pretrained_model_path', PRETRAINED_MODEL,#None, # 39 | 'if not None, fine-tunes from this ckpt') 40 | flags.DEFINE_integer('batch_size', 32, 41 | 'Input image size to the network after preprocessing') 42 | 43 | # Don't change if testing: 44 | flags.DEFINE_integer('img_size', 224, 45 | 'Input image size to the network after preprocessing') 46 | flags.DEFINE_string('data_format', 'NHWC', 'Data format') 47 | flags.DEFINE_integer('num_stage', 3, '# of times to iterate regressor') 48 | flags.DEFINE_string('model_type', 'resnet_fc3_dropout', 49 | 'Specifies which network to use') 50 | flags.DEFINE_string( 51 | 'joint_type', 'cocoplus', 52 | 'cocoplus (19 keypoints) or lsp 14 keypoints, returned by SMPL') 53 | 54 | # Training settings: 55 | # TODO! If you want to train, change this to your 'tf_datasets' or specify it with the flag. 56 | DATA_DIR = '/scratch1/williamljb/hmr_multiview/tf_datasets/' 57 | 58 | flags.DEFINE_string('data_dir', DATA_DIR, 'Where to save training models') 59 | flags.DEFINE_string('log_dir', 'logs', 'Where to save training models') 60 | flags.DEFINE_string('model_dir', None, 'Where model will be saved -- filled automatically') 61 | flags.DEFINE_integer('log_img_step', 100, 'How often to visualize img during training') 62 | flags.DEFINE_integer('epoch', 100, '# of epochs to train') 63 | 64 | flags.DEFINE_list('datasets', ['coco','h36m','mpi_inf_3dhp', 'synthetic'], 65 | 'datasets to use for training') 66 | flags.DEFINE_list('mocap_datasets', [], 67 | 'datasets to use for adversarial prior training') 68 | 69 | # Model config 70 | flags.DEFINE_boolean( 71 | 'encoder_only', True, 72 | 'if set, no adversarial prior is trained = monsters') 73 | 74 | flags.DEFINE_boolean( 75 | 'use_3d_label', True, 76 | 'Uses 3D labels if on.') 77 | 78 | # Hyper parameters: 79 | flags.DEFINE_float('e_lr', 1e-5, 'Encoder learning rate') 80 | flags.DEFINE_float('d_lr', 1e-5, 'Adversarial prior learning rate') 81 | flags.DEFINE_float('e_wd', 0.0001, 'Encoder weight decay') 82 | flags.DEFINE_float('d_wd', 0.0001, 'Adversarial prior weight decay') 83 | 84 | flags.DEFINE_float('e_loss_weight', 60, 'weight on E_kp losses') 85 | flags.DEFINE_float('d_loss_weight', 1, 'weight on discriminator') 86 | 87 | 88 | flags.DEFINE_float('e_3d_weight', 1, 'weight on E_3d') 89 | flags.DEFINE_float('e_pose_weight', 1, 'weight on E_3d') 90 | flags.DEFINE_float('e_shape_weight', 1, 'weight on E_3d') 91 | 92 | # Data augmentation 93 | flags.DEFINE_integer('trans_max', 20, 'Value to jitter translation') 94 | flags.DEFINE_float('scale_max', 1.23, 'Max value of scale jitter') 95 | flags.DEFINE_float('scale_min', 0.8, 'Min value of scale jitter') 96 | 97 | 98 | def get_config(): 99 | config = flags.FLAGS 100 | config(sys.argv) 101 | 102 | if 'resnet' in config.model_type: 103 | setattr(config, 'img_size', 224) 104 | # Slim resnet wants NHWC.. 105 | setattr(config, 'data_format', 'NHWC') 106 | 107 | return config 108 | 109 | 110 | # ----- For training ----- # 111 | 112 | 113 | def prepare_dirs(config, prefix=['HMR']): 114 | # Continue training from a load_path 115 | if config.load_path: 116 | if not osp.exists(config.load_path): 117 | print("load_path: %s doesnt exist..!!!" % config.load_path) 118 | import ipdb 119 | ipdb.set_trace() 120 | print('continuing from %s!' % config.load_path) 121 | 122 | # Check for changed training parameter: 123 | # Load prev config param path 124 | param_path = glob(osp.join(config.load_path, '*.json'))[0] 125 | 126 | with open(param_path, 'r') as fp: 127 | prev_config = json.load(fp) 128 | dict_here = config.__dict__ 129 | ignore_keys = ['load_path', 'log_img_step', 'pretrained_model_path'] 130 | diff_keys = [ 131 | k for k in dict_here 132 | if k not in ignore_keys and k in prev_config.keys() 133 | and prev_config[k] != dict_here[k] 134 | ] 135 | 136 | for k in diff_keys: 137 | if k == 'load_path' or k == 'log_img_step': 138 | continue 139 | if prev_config[k] is None and dict_here[k] is not None: 140 | print("%s is different!! before: None after: %g" % 141 | (k, dict_here[k])) 142 | elif prev_config[k] is not None and dict_here[k] is None: 143 | print("%s is different!! before: %g after: None" % 144 | (k, prev_config[k])) 145 | else: 146 | print("%s is different!! before: " % k) 147 | print(prev_config[k]) 148 | print("now:") 149 | print(dict_here[k]) 150 | 151 | if len(diff_keys) > 0: 152 | print("really continue??") 153 | import ipdb 154 | ipdb.set_trace() 155 | 156 | config.model_dir = config.load_path 157 | 158 | else: 159 | postfix = [] 160 | 161 | # If config.dataset is not the same as default, add that to name. 162 | default_dataset = [ 163 | 'lsp', 'lsp_ext', 'mpii', 'h36m', 'coco', 'mpi_inf_3dhp' 164 | ] 165 | default_mocap = ['CMU', 'H3.6', 'jointLim'] 166 | 167 | if sorted(config.datasets) != sorted(default_dataset): 168 | has_all_default = np.all( 169 | [name in config.datasets for name in default_dataset]) 170 | if has_all_default: 171 | new_names = [ 172 | name for name in sorted(config.datasets) 173 | if name not in default_dataset 174 | ] 175 | postfix.append('default+' + '-'.join(sorted(new_names))) 176 | else: 177 | postfix.append('-'.join(sorted(config.datasets))) 178 | if sorted(config.mocap_datasets) != sorted(default_mocap): 179 | postfix.append('-'.join(config.mocap_datasets)) 180 | 181 | postfix.append(config.model_type) 182 | 183 | if config.num_stage != 4: 184 | prefix += ["T%d" % config.num_stage] 185 | 186 | postfix.append("Elr%1.e" % config.e_lr) 187 | 188 | if config.e_loss_weight != 1: 189 | postfix.append("kp-weight%g" % config.e_loss_weight) 190 | 191 | if not config.encoder_only: 192 | postfix.append("Dlr%1.e" % config.d_lr) 193 | if config.d_loss_weight != 1: 194 | postfix.append("d-weight%g" % config.d_loss_weight) 195 | 196 | if config.use_3d_label: 197 | print('Using 3D labels!!') 198 | prefix.append("3DSUP") 199 | if config.e_3d_weight != 1: 200 | postfix.append("3dsup-weight%g" % config.e_3d_weight) 201 | 202 | # Data: 203 | # Jitter amount: 204 | if config.trans_max != 20: 205 | postfix.append("transmax-%d" % config.trans_max) 206 | if config.scale_max != 1.23: 207 | postfix.append("scmax_%.3g" % config.scale_max) 208 | if config.scale_min != 0.8: 209 | postfix.append("scmin-%.3g" % config.scale_min) 210 | 211 | prefix = '_'.join(prefix) 212 | postfix = '_'.join(postfix) 213 | 214 | time_str = datetime.now().strftime("%b%d_%H%M") 215 | 216 | save_name = "%s_%s_%s" % (prefix, postfix, time_str) 217 | config.model_dir = osp.join(config.log_dir, save_name) 218 | 219 | for path in [config.log_dir, config.model_dir]: 220 | if not osp.exists(path): 221 | print('making %s' % path) 222 | makedirs(path) 223 | 224 | 225 | def save_config(config): 226 | param_path = osp.join(config.model_dir, "params.json") 227 | 228 | print("[*] MODEL dir: %s" % config.model_dir) 229 | print("[*] PARAM path: %s" % param_path) 230 | 231 | config_dict = {} 232 | for k in dir(config): 233 | config_dict[k] = config.__getattr__(k) 234 | 235 | with open(param_path, 'w') as fp: 236 | json.dump(config_dict, fp, indent=4, sort_keys=True) 237 | -------------------------------------------------------------------------------- /src_ortho/config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/config.pyc -------------------------------------------------------------------------------- /src_ortho/data_loader.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/data_loader.pyc -------------------------------------------------------------------------------- /src_ortho/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/datasets/__init__.py -------------------------------------------------------------------------------- /src_ortho/datasets/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/datasets/__init__.pyc -------------------------------------------------------------------------------- /src_ortho/datasets/coco_to_tfrecords.py: -------------------------------------------------------------------------------- 1 | """ Convert Coco to TFRecords """ 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from os.path import join, exists 8 | from os import makedirs 9 | 10 | import numpy as np 11 | import tensorflow as tf 12 | 13 | from .pycocotools.coco import COCO 14 | 15 | from .common import convert_to_example, ImageCoder, resize_img 16 | 17 | tf.app.flags.DEFINE_string('data_directory', '/scratch1/storage/coco/', 18 | 'data directory: top of coco') 19 | tf.app.flags.DEFINE_string('output_directory', 20 | '/scratch1/projects/tf_datasets/coco_wmask/', 21 | 'Output data directory') 22 | 23 | tf.app.flags.DEFINE_integer('train_shards', 500, 24 | 'Number of shards in training TFRecord files.') 25 | tf.app.flags.DEFINE_integer('validation_shards', 500, 26 | 'Number of shards in validation TFRecord files.') 27 | FLAGS = tf.app.flags.FLAGS 28 | 29 | joint_names = [ 30 | 'R Ankle', 'R Knee', 'R Hip', 'L Hip', 'L Knee', 'L Ankle', 'R Wrist', 31 | 'R Elbow', 'R Shoulder', 'L Shoulder', 'L Elbow', 'L Wrist', 'Neck', 32 | 'Head', 'Nose', 'L Eye', 'R Eye', 'L Ear', 'R Ear' 33 | ] 34 | 35 | 36 | def convert_coco2universal(kp): 37 | """ 38 | Mapping from COCO joints (kp: 17 x 3) to 39 | Universal 19 joints (14 lsp)+ (5 coco faces). 40 | 41 | Permutes and adds extra 0 two rows for missing head and neck 42 | returns: 19 x 3 43 | """ 44 | 45 | UNIVERSAL_BODIES = [ 46 | 16, # R ankle 47 | 14, # R knee 48 | 12, # R hip 49 | 11, # L hip 50 | 13, # L knee 51 | 15, # L ankle 52 | 10, # R Wrist 53 | 8, # R Elbow 54 | 6, # R shoulder 55 | 5, # L shoulder 56 | 7, # L Elbow 57 | 9, # L Wrist 58 | ] 59 | UNIVERSAL_HEADS = range(5) 60 | new_kp = np.vstack((kp[UNIVERSAL_BODIES, :], np.zeros((2, 3)), 61 | kp[UNIVERSAL_HEADS, :])) 62 | return new_kp 63 | 64 | 65 | def get_anns_details(anns, coco, min_vis=5, min_max_height=60): 66 | """ 67 | anns is the list of annotations 68 | coco is the cocoAPI 69 | 70 | extracts the boundingbox (using the mask) 71 | and the keypoints for each person. 72 | 73 | Ignores the person if there is no or < min_vis keypoints 74 | Ignores the person if max bbox length is <= min_max_height 75 | """ 76 | points_other_than_faceshoulder = [ 77 | 16, # R ankle 78 | 14, # R knee 79 | 12, # R hip 80 | 11, # L hip 81 | 13, # L knee 82 | 15, # L ankle 83 | 10, # R Wrist 84 | 8, # R Elbow 85 | 7, # L Elbow 86 | 9, # L Wrist 87 | ] 88 | filtered_anns = [] 89 | kps = [] 90 | centers, bboxes = [], [] 91 | masks = [] 92 | for ann in anns: 93 | if 'keypoints' not in ann or type(ann['keypoints']) != list: 94 | # Ignore those without keypoints 95 | continue 96 | if ann['num_keypoints'] == 0: 97 | continue 98 | 99 | if 'segmentation' in ann: 100 | # Use the mask to compute center 101 | mask = coco.annToMask(ann) 102 | # import ipdb; ipdb.set_trace() 103 | # import matplotlib.pyplot as plt 104 | # plt.ion() 105 | # plt.figure(1) 106 | # plt.imshow(mask) 107 | # plt.pause(1e-3) 108 | # this is N x 2 (in [x, y]) of fgpts 109 | fg_pts = np.transpose(np.nonzero(mask))[:, ::-1] 110 | min_pt = np.min(fg_pts, axis=0) 111 | max_pt = np.max(fg_pts, axis=0) 112 | bbox = [min_pt, max_pt[0] - min_pt[0], max_pt[1] - min_pt[1]] 113 | center = (min_pt + max_pt) / 2. 114 | else: 115 | print('No segmentation!') 116 | import ipdb 117 | ipdb.set_trace() 118 | 119 | kp_raw = np.array(ann['keypoints']) 120 | x = kp_raw[0::3] 121 | y = kp_raw[1::3] 122 | v = kp_raw[2::3] 123 | # At least min_vis many visible (not occluded) kps. 124 | if sum(v == 2) >= min_vis and max(bbox[2:]) > min_max_height: 125 | # If only face & shoulder visible, skip. 126 | if np.all(v[points_other_than_faceshoulder] == 0): 127 | continue 128 | kp = np.vstack([x, y, v]).T 129 | kps.append(kp) 130 | filtered_anns.append(ann) 131 | centers.append(center) 132 | bboxes.append(bbox) 133 | masks.append(mask) 134 | 135 | return filtered_anns, kps, bboxes, centers, masks 136 | 137 | 138 | def parse_people(kps, centers, masks): 139 | ''' 140 | Parses people i.e. figures out scale from annotation. 141 | Input: 142 | 143 | Returns: 144 | people - list of tuple (kp, img_scale, obj_pos) in this image. 145 | ''' 146 | # No single persons in this image. 147 | if len(kps) == 0: 148 | return [] 149 | 150 | # Read each human: 151 | people = [] 152 | 153 | for kp, center, mask in zip(kps, centers, masks): 154 | # Universal joints! 155 | joints = convert_coco2universal(kp).T 156 | # Scale person to be roughly 150x height 157 | visible = joints[2, :].astype(bool) 158 | min_pt = np.min(joints[:2, visible], axis=1) 159 | max_pt = np.max(joints[:2, visible], axis=1) 160 | person_height = np.linalg.norm(max_pt - min_pt) 161 | 162 | R_ank = joint_names.index('R Ankle') 163 | L_ank = joint_names.index('L Ankle') 164 | 165 | # If ankles are visible 166 | if visible[R_ank] or visible[L_ank]: 167 | my_scale = 150. / person_height 168 | else: 169 | L_should = joint_names.index('L Shoulder') 170 | L_hip = joint_names.index('L Hip') 171 | R_should = joint_names.index('R Shoulder') 172 | R_hip = joint_names.index('R Hip') 173 | # Torso points left should, right shold, right hip, left hip 174 | # torso_points = joints[:, [9, 8, 2, 3]] 175 | torso_heights = [] 176 | if visible[L_should] and visible[L_hip]: 177 | torso_heights.append( 178 | np.linalg.norm(joints[:2, L_should] - joints[:2, L_hip])) 179 | if visible[R_should] and visible[R_hip]: 180 | torso_heights.append( 181 | np.linalg.norm(joints[:2, R_should] - joints[:2, R_hip])) 182 | # Make torso 75px 183 | if len(torso_heights) > 0: 184 | my_scale = 75. / np.mean(torso_heights) 185 | else: # No torso! 186 | body_inds = np.array([0, 1, 2, 3, 4, 5, 6, 7, 10, 11]) 187 | if np.all(visible[body_inds] == 0): 188 | print('Face only! skip..') 189 | continue 190 | else: 191 | my_scale = 50. / person_height 192 | 193 | people.append((joints, my_scale, center, mask)) 194 | 195 | return people 196 | 197 | 198 | def add_to_tfrecord(coco, img_id, img_dir, coder, writer, is_train): 199 | """ 200 | Add each "single person" in this image. 201 | coco - coco API 202 | 203 | Returns: 204 | The number of people added. 205 | """ 206 | # Get annotation id for this guy 207 | # Cat ids is [1] for human.. 208 | ann_id = coco.getAnnIds(imgIds=img_id, catIds=[1], iscrowd=False) 209 | anns = coco.loadAnns(ann_id) 210 | # coco.showAnns(anns) 211 | filtered_anns, kps, bboxes, centers, masks = get_anns_details( 212 | anns, coco, min_vis=6, min_max_height=60) 213 | 214 | # Figure out the scale and pack each one in a tuple 215 | people = parse_people(kps, centers, masks) 216 | 217 | if len(people) == 0: 218 | # print('No single persons in img %d' % img_id) 219 | return 0 220 | 221 | # Add each people to tf record 222 | img_data = coco.loadImgs(img_id)[0] 223 | image_path = join(img_dir, img_data['file_name']) 224 | with tf.gfile.FastGFile(image_path, 'rb') as f: 225 | image_data = f.read() 226 | 227 | image = coder.decode_jpeg(image_data) 228 | 229 | for joints, scale, pos, mask in people: 230 | # Scale image: 231 | image_scaled, scale_factors = resize_img(image, scale) 232 | height, width = image_scaled.shape[:2] 233 | joints_scaled = np.copy(joints) 234 | joints_scaled[0, :] *= scale_factors[0] 235 | joints_scaled[1, :] *= scale_factors[1] 236 | # center = pos * scale_factors 237 | 238 | visible = joints[2, :].astype(bool) 239 | min_pt = np.min(joints_scaled[:2, visible], axis=1) 240 | max_pt = np.max(joints_scaled[:2, visible], axis=1) 241 | center = (min_pt + max_pt) / 2. 242 | 243 | ## Crop 400x400 around this image.. 244 | margin = 200 245 | start_pt = np.maximum(center - margin, 0).astype(int) 246 | end_pt = (center + margin).astype(int) 247 | end_pt[0] = min(end_pt[0], width) 248 | end_pt[1] = min(end_pt[1], height) 249 | image_scaled = image_scaled[start_pt[1]:end_pt[1], start_pt[0]:end_pt[ 250 | 0], :] 251 | # Update others oo. 252 | joints_scaled[0, :] -= start_pt[0] 253 | joints_scaled[1, :] -= start_pt[1] 254 | center -= start_pt 255 | height, width = image_scaled.shape[:2] 256 | 257 | # Vis: 258 | """ 259 | import matplotlib.pyplot as plt 260 | plt.ion() 261 | plt.clf() 262 | fig = plt.figure(1) 263 | ax = fig.add_subplot(121) 264 | image_with_skel = draw_skeleton(image, joints[:2, :], vis=visible, radius=(np.mean(image.shape[:2]) * 0.01).astype(int)) 265 | ax.imshow(image_with_skel) 266 | ax.axis('off') 267 | # ax.imshow(image) 268 | # ax.scatter(joints[0, visible], joints[1, visible]) 269 | # ax.scatter(joints[0, ~visible], joints[1, ~visible], color='green') 270 | ax.scatter(pos[0], pos[1], color='red') 271 | ax = fig.add_subplot(122) 272 | image_with_skel_scaled = draw_skeleton(image_scaled, joints_scaled[:2, :], vis=visible, radius=max(4, (np.mean(image_scaled.shape[:2]) * 0.01).astype(int))) 273 | ax.imshow(image_with_skel_scaled) 274 | ax.scatter(center[0], center[1], color='red') 275 | # ax.imshow(image_scaled) 276 | # ax.scatter(joints_scaled[0, visible], joints_scaled[1, visible]) 277 | # ax.scatter(pos_scaled[0], pos_scaled[1], color='red') 278 | ax.axis('on') 279 | plt.draw() 280 | plt.pause(0.01) 281 | """ 282 | 283 | # Encode image: 284 | image_data_scaled = coder.encode_jpeg(image_scaled) 285 | example = convert_to_example(image_data_scaled, image_path, height, 286 | width, joints_scaled, center) 287 | writer.write(example.SerializeToString()) 288 | 289 | # Finally return how many were written. 290 | return len(people) 291 | 292 | 293 | def process_coco(data_dir, out_dir, num_shards, is_train=True): 294 | 295 | if is_train: 296 | data_type = 'train2014' 297 | out_path = join(out_dir, 'train_%04d_wmeta.tfrecord') 298 | else: 299 | data_type = 'val2014' 300 | out_path = join(out_dir, 'val_%04d_wmeta.tfrecord') 301 | 302 | anno_file = join(data_dir, 303 | 'annotations/person_keypoints_%s.json' % data_type) 304 | img_dir = join(data_dir, 'images', data_type) 305 | # initialize COCO api for person keypoints annotations 306 | coco = COCO(anno_file) 307 | catIds = coco.getCatIds(catNms=['person']) 308 | img_inds = coco.getImgIds(catIds=catIds) 309 | # Only run on 'single person's 310 | coder = ImageCoder() 311 | 312 | i = 0 313 | # Count on shards 314 | fidx = 0 315 | num_ppl = 0 316 | total_num_ppl = 0 317 | while i < len(img_inds): 318 | tf_filename = out_path % fidx 319 | print('Starting tfrecord file %s' % tf_filename) 320 | with tf.python_io.TFRecordWriter(tf_filename) as writer: 321 | # Count on total ppl in each shard 322 | num_ppl = 0 323 | while i < len(img_inds) and num_ppl < num_shards: 324 | if i % 100 == 0: 325 | print('Reading img %d/%d' % (i, len(img_inds))) 326 | num_ppl += add_to_tfrecord(coco, img_inds[i], img_dir, coder, 327 | writer, is_train) 328 | i += 1 329 | total_num_ppl += num_ppl 330 | 331 | fidx += 1 332 | 333 | print('Made %d shards, with total # of people: %d' % 334 | (fidx - 1, total_num_ppl)) 335 | 336 | 337 | def main(unused_argv): 338 | print('Saving results to %s' % FLAGS.output_directory) 339 | 340 | if not exists(FLAGS.output_directory): 341 | makedirs(FLAGS.output_directory) 342 | process_coco( 343 | FLAGS.data_directory, 344 | FLAGS.output_directory, 345 | FLAGS.train_shards, 346 | is_train=True) 347 | # do_valid 348 | # _process_coco(FLAGS.data_directory, FLAGS.output_directory, FLAGS.validation_shards, is_train=False) 349 | 350 | 351 | if __name__ == '__main__': 352 | tf.app.run() 353 | -------------------------------------------------------------------------------- /src_ortho/datasets/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helpers for tfrecord conversion. 3 | """ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import tensorflow as tf 10 | import numpy as np 11 | 12 | 13 | class ImageCoder(object): 14 | """Helper class that provides TensorFlow image coding utilities. 15 | Taken from 16 | https://github.com/tensorflow/models/blob/master/inception/inception/data/build_image_data.py 17 | """ 18 | 19 | def __init__(self): 20 | # Create a single Session to run all image coding calls. 21 | self._sess = tf.Session() 22 | 23 | # Initializes function that converts PNG to JPEG data. 24 | self._png_data = tf.placeholder(dtype=tf.string) 25 | image = tf.image.decode_png(self._png_data, channels=3) 26 | self._png_to_jpeg = tf.image.encode_jpeg( 27 | image, format='rgb', quality=100) 28 | 29 | # Initializes function that decodes RGB JPEG data. 30 | self._decode_jpeg_data = tf.placeholder(dtype=tf.string) 31 | self._decode_jpeg = tf.image.decode_jpeg( 32 | self._decode_jpeg_data, channels=3) 33 | 34 | self._encode_jpeg_data = tf.placeholder(dtype=tf.uint8) 35 | self._encode_jpeg = tf.image.encode_jpeg( 36 | self._encode_jpeg_data, format='rgb') 37 | 38 | self._decode_png_data = tf.placeholder(dtype=tf.string) 39 | self._decode_png = tf.image.decode_png( 40 | self._decode_png_data, channels=3) 41 | 42 | self._encode_png_data = tf.placeholder(dtype=tf.uint8) 43 | self._encode_png = tf.image.encode_png(self._encode_png_data) 44 | 45 | def png_to_jpeg(self, image_data): 46 | return self._sess.run( 47 | self._png_to_jpeg, feed_dict={ 48 | self._png_data: image_data 49 | }) 50 | 51 | def decode_jpeg(self, image_data): 52 | image = self._sess.run( 53 | self._decode_jpeg, feed_dict={ 54 | self._decode_jpeg_data: image_data 55 | }) 56 | assert len(image.shape) == 3 57 | assert image.shape[2] == 3 58 | return image 59 | 60 | def encode_jpeg(self, image): 61 | image_data = self._sess.run( 62 | self._encode_jpeg, feed_dict={ 63 | self._encode_jpeg_data: image 64 | }) 65 | return image_data 66 | 67 | def encode_png(self, image): 68 | image_data = self._sess.run( 69 | self._encode_png, feed_dict={ 70 | self._encode_png_data: image 71 | }) 72 | return image_data 73 | 74 | def decode_png(self, image_data): 75 | image = self._sess.run( 76 | self._decode_png, feed_dict={ 77 | self._decode_png_data: image_data 78 | }) 79 | assert len(image.shape) == 3 80 | assert image.shape[2] == 3 81 | return image 82 | 83 | 84 | def int64_feature(value): 85 | """Wrapper for inserting int64 features into Example proto.""" 86 | if not isinstance(value, list) and not isinstance(value, np.ndarray): 87 | value = [value] 88 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 89 | 90 | 91 | def float_feature(value): 92 | """Wrapper for inserting float features into Example proto.""" 93 | if not isinstance(value, list) and not isinstance(value, np.ndarray): 94 | value = [value] 95 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 96 | 97 | 98 | def bytes_feature(value): 99 | """Wrapper for inserting bytes features into Example proto.""" 100 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 101 | 102 | 103 | def convert_to_example(image_data, image_path, height, width, label, center): 104 | """Build an Example proto for an image example. 105 | Args: 106 | image_data: string, JPEG encoding of RGB image; 107 | image_path: string, path to this image file 108 | labels: 3 x 14 joint location + visibility --> This could be 3 x 19 109 | height, width: integers, image shapes in pixels. 110 | center: 2 x 1 center of the tight bbox 111 | Returns: 112 | Example proto 113 | """ 114 | from os.path import basename 115 | 116 | image_format = 'JPEG' 117 | add_face = False 118 | if label.shape[1] == 19: 119 | add_face = True 120 | # Split and save facepts on it's own. 121 | face_pts = label[:, 14:] 122 | label = label[:, :14] 123 | has_3d=np.zeros(1) 124 | feat_dict = { 125 | 'image/height': int64_feature(height), 126 | 'image/width': int64_feature(width), 127 | 'image/center': int64_feature(center.astype(np.int)), 128 | 'image/x': float_feature(label[0, :].astype(np.float)), 129 | 'image/y': float_feature(label[1, :].astype(np.float)), 130 | 'image/visibility': int64_feature(label[2, :].astype(np.int)), 131 | 'image/format': bytes_feature([tf.compat.as_bytes(image_format)]), 132 | 'image/filename': bytes_feature( 133 | [tf.compat.as_bytes(basename(image_path))]), 134 | 'image/encoded': bytes_feature([tf.compat.as_bytes(image_data)]), 135 | 'meta/has_3d': int64_feature(has_3d.astype(np.int)), 136 | 'meta/has_3djoint': int64_feature(0), 137 | } 138 | if add_face: 139 | # 3 x 5 140 | feat_dict.update({ 141 | 'image/face_pts': 142 | float_feature(face_pts.ravel().astype(np.float)) 143 | }) 144 | 145 | example = tf.train.Example(features=tf.train.Features(feature=feat_dict)) 146 | 147 | return example 148 | 149 | 150 | def convert_to_example_wmosh(image_data, image_path, height, width, label, 151 | center, gt3d, pose, shape, scale_factors, 152 | start_pt, cam): 153 | """Build an Example proto for an image example. 154 | Args: 155 | image_data: 4*string, JPEG encoding of RGB image; 156 | image_path: 4*string, path to this image file 157 | labels: 4*3 x 14 joint location + visibility 158 | height, width: 4*integers, image shapes in pixels. 159 | center:4*2 x 1 center of the tight bbox 160 | gt3d: 4*14x3 3D joint locations 161 | scale_factors: 4*2 x 1, scale factor used to scale image. 162 | start_pt: 4*the left corner used to crop the _scaled_ image to 300x300 163 | cam: (4,3,), [f, px, py] intrinsic camera parameters. 164 | Returns: 165 | Example proto 166 | """ 167 | from os.path import basename 168 | image_format = 'JPEG' 169 | label = np.array(label) 170 | if label.shape[1] != 3: 171 | label = label.T 172 | if label.shape[2] > 14: 173 | print('This shouldnt be happening') 174 | import ipdb 175 | ipdb.set_trace() 176 | num_cam = label.shape[0] 177 | # has_3d = [has_joint, has_smpl] 178 | has_3d = np.ones(2) 179 | if pose[0] is None: 180 | has_3d[1] = 0 181 | # Use -1 to save. 182 | pose = -np.ones((num_cam, 72)) 183 | shape = -np.ones((num_cam, 10)) 184 | if gt3d[0][0][0] == -1: 185 | gt3d = -np.ones((num_cam, 14, 3)) 186 | has_3d[0] = 0 187 | 188 | example = tf.train.Example( 189 | features=tf.train.Features(feature={ 190 | 'image/height': 191 | int64_feature(height), 192 | 'image/width': 193 | int64_feature(width), 194 | 'image/center': 195 | int64_feature(np.array(center).ravel().astype(np.int)), 196 | 'image/x': 197 | float_feature(label[:, 0, :].ravel().astype(np.float)), 198 | 'image/y': 199 | float_feature(label[:, 1, :].ravel().astype(np.float)), 200 | 'image/visibility': 201 | int64_feature(label[:, 2, :].ravel().astype(np.int)), 202 | 'image/format': 203 | bytes_feature([tf.compat.as_bytes(image_format) for _ in image_path]), 204 | 'image/filename': 205 | bytes_feature([tf.compat.as_bytes(basename(path)) for path in image_path]), 206 | 'image/encoded': 207 | bytes_feature([tf.compat.as_bytes(data) for data in image_data]), 208 | 'mosh/pose': 209 | float_feature(np.array(pose).ravel().astype(np.float)), 210 | 'mosh/shape': 211 | float_feature(np.array(shape).ravel().astype(np.float)), 212 | 'mosh/gt3d': 213 | float_feature(gt3d.ravel().astype(np.float)), 214 | 'meta/scale_factors': 215 | float_feature(np.array(scale_factors).ravel().astype(np.float)), 216 | 'meta/crop_pt': 217 | int64_feature(np.array(start_pt).ravel().astype(np.int)), 218 | 'meta/has_3d': 219 | int64_feature(has_3d.astype(np.int)), 220 | 'image/cam': 221 | float_feature(np.array(cam).ravel().astype(np.float)), 222 | })) 223 | 224 | return example 225 | 226 | 227 | def resize_img(img, scale_factor): 228 | import cv2 229 | import numpy as np 230 | new_size = (np.floor(np.array(img.shape[0:2]) * scale_factor)).astype(int) 231 | new_img = cv2.resize(img, (new_size[1], new_size[0])) 232 | # This is scale factor of [height, width] i.e. [y, x] 233 | actual_factor = [ 234 | new_size[0] / float(img.shape[0]), new_size[1] / float(img.shape[1]) 235 | ] 236 | return new_img, actual_factor 237 | 238 | 239 | def read_images_from_tfrecords(tf_path, num_views, img_size=224, sess=None): 240 | """ 241 | Returns image, kp, and gt3d from the tf_paths 242 | 243 | This returns a preprocessed image, cropped around img_size. 244 | """ 245 | from time import time 246 | from os.path import exists 247 | if not exists(tf_path): 248 | print('%s doesnt exist!' % tf_path) 249 | exit(1) 250 | 251 | if sess is None: 252 | sess = tf.Session() 253 | 254 | t0 = time() 255 | all_images, all_kps, all_gt3ds = [], [], [] 256 | all_poses, all_shapes = [], [] 257 | 258 | itr = 0 259 | 260 | # Decode op graph 261 | image_data_pl = tf.placeholder(dtype=tf.string) 262 | decode_op = tf.image.decode_jpeg(image_data_pl) 263 | 264 | for serialized_ex in tf.python_io.tf_record_iterator(tf_path): 265 | example = tf.train.Example() 266 | example.ParseFromString(serialized_ex) 267 | crops, kp_finals, gt3ds = [], [], [] 268 | poses, shapes = [], [] 269 | for i in range(num_views): 270 | image_data = example.features.feature['image/encoded'].bytes_list.value[i] 271 | image = sess.run(decode_op, feed_dict={image_data_pl: image_data}) 272 | 273 | x = example.features.feature['image/x'].float_list.value[14*i:14*(i+1)] 274 | y = example.features.feature['image/y'].float_list.value[14*i:14*(i+1)] 275 | vis = example.features.feature['image/visibility'].int64_list.value[14*i:14*(i+1)] 276 | center = example.features.feature['image/center'].int64_list.value[2*i:2*(i+1)] 277 | 278 | x = np.array(x) 279 | y = np.array(y) 280 | vis = np.array(vis, dtype='bool') 281 | center = np.array(center) 282 | 283 | # Crop img_size. 284 | # Pad in case. 285 | margin = int(img_size/2) 286 | image_pad = np.pad(image, ((200,200), (200,200), (0,0)), mode='edge') 287 | 288 | # figure out starting point 289 | start_pt = center - margin + 200 290 | end_pt = start_pt + 2*margin 291 | 292 | x_crop = x - start_pt[0] + 200 293 | y_crop = y - start_pt[1] + 200 294 | kp_crop = np.vstack([x_crop, y_crop]) 295 | kp_final = 2 * (kp_crop / img_size) - 1 296 | kp_final = np.vstack((vis * kp_final, vis)).T 297 | # crop: 298 | crop = image_pad[start_pt[1]:end_pt[1], start_pt[0]:end_pt[0], :] 299 | if crop.shape != (224,224,3): 300 | print("shape=",crop.shape) 301 | crop = np.zeros((224, 224, 3), np.float32) 302 | 303 | # Normalize image to [-1, 1] 304 | crop = 2 * ((crop / 255.) - 0.5) 305 | 306 | # Note: This says mosh but gt3d is the gt H3.6M joints & not from mosh. 307 | gt3d = example.features.feature['mosh/gt3d'].float_list.value[14*3*i:14*3*(i+1)] 308 | gt3d = np.array(gt3d).reshape(14, 3) 309 | 310 | pose = example.features.feature['mosh/pose'].float_list.value[72*i:72*(i+1)] 311 | shape = example.features.feature['mosh/shape'].float_list.value[10*i:10*(i+1)] 312 | 313 | poses.append(pose) 314 | shapes.append(shape) 315 | crops.append(crop) 316 | kp_finals.append(kp_final) 317 | gt3ds.append(gt3d) 318 | 319 | all_poses.append(poses) 320 | all_shapes.append(shapes) 321 | all_images.append(crops) 322 | all_kps.append(kp_finals) 323 | all_gt3ds.append(gt3ds) 324 | 325 | itr += 1 326 | 327 | images = np.array(all_images) 328 | print(images.shape) 329 | kps = np.array(all_kps) 330 | gt3ds = np.array(all_gt3ds) 331 | # B * view * size 332 | 333 | print('Read %d images, %g secs' % (images.shape[0], time()-t0)) 334 | 335 | return images, kps, gt3ds, np.array(all_poses), np.array(all_shapes) 336 | -------------------------------------------------------------------------------- /src_ortho/datasets/common.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/datasets/common.pyc -------------------------------------------------------------------------------- /src_ortho/datasets/convert_datasets.sh: -------------------------------------------------------------------------------- 1 | # Change to your 2 | OUT_DIR='/Users/kanazawa/projects/tf_datasets/' 3 | 4 | # Change to where each dataset directory is: 5 | LSP_DIR='/scratch1/storage/human_datasets/lsp_dataset/' 6 | LSP_EXT_DIR='/scratch1/storage/human_datasets/lsp_extended/' 7 | MPII_DIR='/scratch1/storage/human_datasets/mpii/' 8 | 9 | # LSP: 10 | python lsp_to_tfrecords.py --img_directory $LSP_DIR --output_directory $OUT_DIR/lsp 11 | # LSP-extended: 12 | python lsp_to_tfrecords.py --img_directory $LSP_EXT_DIR --output_directory $OUT_DIR/lsp_ext 13 | 14 | # MPII: 15 | python mpii_to_tfrecords.py --img_directory $MPII_DIR --output_directory $OUT_DIR/mpii 16 | -------------------------------------------------------------------------------- /src_ortho/datasets/lsp_to_tfrecords.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convert LSP/LSP extended to TFRecords. 3 | In LSP, the first 1000 is training and the last 1000 is test/validation. 4 | All of LSP extended is training. 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | from os import makedirs 11 | from os.path import join, exists 12 | from glob import glob 13 | 14 | import numpy as np 15 | 16 | import tensorflow as tf 17 | 18 | from .common import convert_to_example, ImageCoder 19 | 20 | tf.app.flags.DEFINE_string('img_directory', 21 | '/scratch1/storage/human_datasets/lsp_dataset', 22 | 'image data directory') 23 | tf.app.flags.DEFINE_string( 24 | 'output_directory', '/Users/kanazawa/projects/datasets/tf_datasets/lsp/', 25 | 'Output data directory') 26 | 27 | tf.app.flags.DEFINE_integer('train_shards', 500, 28 | 'Number of shards in training TFRecord files.') 29 | tf.app.flags.DEFINE_integer('validation_shards', 500, 30 | 'Number of shards in validation TFRecord files.') 31 | 32 | FLAGS = tf.app.flags.FLAGS 33 | 34 | 35 | def _add_to_tfrecord(image_path, label, coder, writer, is_lsp_ext=False): 36 | with tf.gfile.FastGFile(image_path, 'rb') as f: 37 | image_data = f.read() 38 | 39 | image = coder.decode_jpeg(image_data) 40 | height, width = image.shape[:2] 41 | assert image.shape[2] == 3 42 | 43 | # LSP 3-D dim, 0 means visible 1 means invisible. 44 | # But in LSP-ext, 0 means invis, 1 means visible 45 | # Negate this 46 | if is_lsp_ext: 47 | visible = label[2, :].astype(bool) 48 | else: 49 | visible = np.logical_not(label[2, :]) 50 | label[2, :] = visible.astype(label.dtype) 51 | min_pt = np.min(label[:2, visible], axis=1) 52 | max_pt = np.max(label[:2, visible], axis=1) 53 | center = (min_pt + max_pt) / 2. 54 | """ 55 | import matplotlib.pyplot as plt 56 | plt.ion() 57 | plt.clf() 58 | fig = plt.figure(1) 59 | ax = fig.add_subplot(111) 60 | plt.imshow(image) 61 | plt.scatter(label[0, visible], label[1, visible]) 62 | plt.scatter(center[0], center[1]) 63 | # bwidth, bheight = max_pt - min_pt + 1 64 | # rect = plt.Rectangle(min_pt, bwidth, bheight, fc='None', ec='green') 65 | # ax.add_patch(rect) 66 | import ipdb; ipdb.set_trace() 67 | """ 68 | 69 | example = convert_to_example(image_data, image_path, height, width, label, 70 | center) 71 | 72 | writer.write(example.SerializeToString()) 73 | 74 | 75 | def package(img_paths, labels, out_path, num_shards): 76 | """ 77 | packages the images and labels into multiple tfrecords. 78 | """ 79 | is_lsp_ext = True if len(img_paths) == 10000 else False 80 | coder = ImageCoder() 81 | 82 | i = 0 83 | fidx = 0 84 | while i < len(img_paths): 85 | # Open new TFRecord file. 86 | tf_filename = out_path % fidx 87 | print('Starting tfrecord file %s' % tf_filename) 88 | with tf.python_io.TFRecordWriter(tf_filename) as writer: 89 | j = 0 90 | while i < len(img_paths) and j < num_shards: 91 | if i % 100 == 0: 92 | print('Converting image %d/%d' % (i, len(img_paths))) 93 | _add_to_tfrecord( 94 | img_paths[i], 95 | labels[:, :, i], 96 | coder, 97 | writer, 98 | is_lsp_ext=is_lsp_ext) 99 | i += 1 100 | j += 1 101 | 102 | fidx += 1 103 | 104 | 105 | def load_mat(fname): 106 | import scipy.io as sio 107 | res = sio.loadmat(fname) 108 | # this is 3 x 14 x 2000 109 | return res['joints'] 110 | 111 | 112 | def process_lsp(img_dir, out_dir, num_shards_train, num_shards_test): 113 | """Process a complete data set and save it as a TFRecord. 114 | LSP has 2000 images, first 1000 is train, last 1000 is test. 115 | 116 | Args: 117 | img_dir: string, root path to the data set. 118 | num_shards: integer number of shards for this data set. 119 | """ 120 | # Load labels 3 x 14 x N 121 | labels = load_mat(join(img_dir, 'joints.mat')) 122 | if labels.shape[0] != 3: 123 | labels = np.transpose(labels, (1, 0, 2)) 124 | 125 | all_images = sorted([f for f in glob(join(img_dir, 'images/*.jpg'))]) 126 | 127 | if len(all_images) == 10000: 128 | # LSP-extended is all train. 129 | train_out = join(out_dir, 'train_%03d.tfrecord') 130 | package(all_images, labels, train_out, num_shards_train) 131 | else: 132 | train_out = join(out_dir, 'train_%03d.tfrecord') 133 | 134 | package(all_images[:1000], labels[:, :, :1000], train_out, 135 | num_shards_train) 136 | 137 | test_out = join(out_dir, 'test_%03d.tfrecord') 138 | package(all_images[1000:], labels[:, :, 1000:], test_out, 139 | num_shards_test) 140 | 141 | 142 | def main(unused_argv): 143 | print('Saving results to %s' % FLAGS.output_directory) 144 | 145 | if not exists(FLAGS.output_directory): 146 | makedirs(FLAGS.output_directory) 147 | process_lsp(FLAGS.img_directory, FLAGS.output_directory, 148 | FLAGS.train_shards, FLAGS.validation_shards) 149 | 150 | 151 | if __name__ == '__main__': 152 | tf.app.run() 153 | -------------------------------------------------------------------------------- /src_ortho/datasets/mpi_inf_3dhp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/datasets/mpi_inf_3dhp/__init__.py -------------------------------------------------------------------------------- /src_ortho/datasets/mpi_inf_3dhp/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/datasets/mpi_inf_3dhp/__init__.pyc -------------------------------------------------------------------------------- /src_ortho/datasets/mpi_inf_3dhp/read_mpi_inf_3dhp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Open up mpi_inf_3dhp. 3 | 4 | TRAINING: 5 | For each subject & sequence there is annot.mat 6 | What is in annot.mat: 7 | 'frames': number of frames, N 8 | 'univ_annot3': (14,) for each camera of N x 84 -> Why is there univ for each camera if it's univ..? 9 | 'annot3': (14,) for each camera of N x 84 10 | 'annot2': (14,) for each camera of N x 56 11 | 'cameras': 12 | 13 | In total there are 28 joints, but H3.6M subsets are used. 14 | 15 | The image frames are unpacked in: 16 | BASE_DIR/S%d/Seq%d/video_%d/frame_%06.jpg 17 | 18 | 19 | TESTING: 20 | 'valid_frame': N_frames x 1 21 | 'annot2': N_frames x 1 x 17 x 2 22 | 'annot3': N_frames x 1 x 17 x 3 23 | 'univ_annot3': N_frames x 1 x 17 x 3 24 | 'bb_crop': this is N_frames x 34 (not sure what this is..) 25 | 'activity_annotation': N_frames x 1 (of integer indicating activity type 26 | The test images are already in jpg. 27 | """ 28 | from __future__ import absolute_import 29 | from __future__ import division 30 | from __future__ import print_function 31 | 32 | import numpy as np 33 | 34 | from os.path import join 35 | 36 | 37 | def get_paths(base_dir, sub_id, seq_id): 38 | data_dir = join(base_dir, 'S%d' % sub_id, 'Seq%d' % seq_id) 39 | anno_path = join(data_dir, 'annot.mat') 40 | img_dir = join(data_dir, 'imageFrames') 41 | return img_dir, anno_path 42 | 43 | 44 | def read_mat(path): 45 | from scipy.io import loadmat 46 | res = loadmat(path, struct_as_record=True, squeeze_me=True) 47 | 48 | cameras = res['cameras'] 49 | annot2 = np.stack(res['annot2']) 50 | annot3 = np.stack(res['annot3']) 51 | frames = res['frames'] 52 | 53 | # univ_annot3 = np.stack(res['univ_annot3']) 54 | 55 | return frames, cameras, annot2, annot3 56 | 57 | 58 | def mpi_inf_3dhp_to_lsp_idx(): 59 | # For training, this joint_idx gives names 17 60 | raw_to_h36m17_idx = np.array( 61 | [8, 6, 15, 16, 17, 10, 11, 12, 24, 25, 26, 19, 20, 21, 5, 4, 7]) - 1 62 | names_17 = [ 63 | 'Head', 'Neck', 'R Shoulder', 'R Elbow', 'R Wrist', 'L Shoulder', 64 | 'L Elbow', 'L Wrist', 'R Hip', 'R Knee', 'R Ankle', 'L Hip', 'L Knee', 65 | 'L Ankle', 'Pelvis', 'Spine', 'Head' 66 | ] 67 | want_names = [ 68 | 'R Ankle', 'R Knee', 'R Hip', 'L Hip', 'L Knee', 'L Ankle', 'R Wrist', 69 | 'R Elbow', 'R Shoulder', 'L Shoulder', 'L Elbow', 'L Wrist', 'Neck', 70 | 'Head' 71 | ] 72 | 73 | h36m17_to_lsp_idx = [names_17.index(j) for j in want_names] 74 | 75 | raw_to_lsp_idx = raw_to_h36m17_idx[h36m17_to_lsp_idx] 76 | 77 | return raw_to_lsp_idx, h36m17_to_lsp_idx 78 | 79 | 80 | def read_camera(base_dir): 81 | cam_path = join(base_dir, 'S1/Seq1/camera.calibration') 82 | lines = [] 83 | with open(cam_path, 'r') as f: 84 | for line in f: 85 | content = [x for x in line.strip().split(' ') if x] 86 | lines.append(content) 87 | 88 | def get_cam_info(block): 89 | cam_id = int(block[0][1]) 90 | # Intrinsic 91 | intrinsic = block[4][1:] 92 | K = np.array([np.float(cont) for cont in intrinsic]).reshape(4, 4) 93 | # Extrinsic: 94 | extrinsic = block[5][1:] 95 | Ext = np.array([float(cont) for cont in extrinsic]).reshape(4, 4) 96 | return cam_id, K, Ext 97 | 98 | # Skip header 99 | lines = lines[1:] 100 | # each camera is 7 lines long. 101 | num_cams = int(len(lines) / 7) 102 | cams = {} 103 | for i in range(num_cams): 104 | cam_id, K, Ext = get_cam_info(lines[7 * i:7 * i + 7]) 105 | cams[cam_id] = K 106 | 107 | return cams 108 | -------------------------------------------------------------------------------- /src_ortho/datasets/mpi_inf_3dhp/read_mpi_inf_3dhp.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/datasets/mpi_inf_3dhp/read_mpi_inf_3dhp.pyc -------------------------------------------------------------------------------- /src_ortho/datasets/mpi_inf_3dhp_test_to_tfrecords.py: -------------------------------------------------------------------------------- 1 | """ Convert MPI_INF_3DHP to TFRecords """ 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | from os.path import join, exists 7 | from os import makedirs 8 | 9 | import numpy as np 10 | 11 | import tensorflow as tf 12 | 13 | from .common import convert_to_example_wmosh, ImageCoder, resize_img 14 | from .mpi_inf_3dhp.read_mpi_inf_3dhp import get_paths, read_mat, mpi_inf_3dhp_to_lsp_idx, read_camera 15 | 16 | tf.app.flags.DEFINE_string('data_directory', '/scratch1/storage/mpi_inf_3dhp/', 17 | 'data directory: top of mpi-inf-3dhp') 18 | tf.app.flags.DEFINE_string('output_directory', 19 | '/scratch1/projects/tf_datasets/mpi_inf_3dhp/', 20 | 'Output data directory') 21 | 22 | tf.app.flags.DEFINE_string('split', 'test', 'train or val') 23 | tf.app.flags.DEFINE_integer('train_shards', 500, 24 | 'Number of shards in training TFRecord files.') 25 | 26 | FLAGS = tf.app.flags.FLAGS 27 | MIN_VIS_PTS = 8 # This many points must be within the image. 28 | 29 | # To go to h36m joints: 30 | # training joints have 28 joints 31 | # test joints are 17 (H3.6M subset in CPM order) 32 | joint_idx2lsp, test_idx2lsp = mpi_inf_3dhp_to_lsp_idx() 33 | 34 | 35 | def sample_frames(gt3ds): 36 | use_these = np.zeros(gt3ds.shape[0], bool) 37 | # Always use_these first frame. 38 | use_these[0] = True 39 | prev_kp3d = gt3ds[0] 40 | for itr, kp3d in enumerate(gt3ds): 41 | if itr > 0: 42 | # Check if any joint moved more than 200mm. 43 | if not np.any(np.linalg.norm(prev_kp3d - kp3d, axis=1) >= 200): 44 | continue 45 | use_these[itr] = True 46 | prev_kp3d = kp3d 47 | 48 | return use_these 49 | 50 | 51 | def get_all_data(base_dir, sub_id, seq_id, cam_ids): 52 | data_dir = join(base_dir, 'TS%d' % sub_id) 53 | anno_path = join(data_dir, 'annot_data.mat') 54 | img_dir = join(data_dir, 'imageSequence') 55 | # Get data for all cameras. 56 | import h5py 57 | res = h5py.File(anno_path, 'r') 58 | 59 | valid_frame = np.array(res['valid_frame'], np.int32)[:,0].astype(bool) 60 | univ_annot3 = np.array(res['univ_annot3']) 61 | print("valid_frame:",valid_frame.shape) 62 | print("annot3:",univ_annot3.shape) 63 | 64 | all_gt2ds, all_gt3ds, all_img_paths = [], [], [] 65 | all_cams = [] 66 | for cam_id in cam_ids: 67 | base_path = join(img_dir, 'crop_%06d.jpg') 68 | num_frames = univ_annot3.shape[0] 69 | gt2ds = np.zeros((num_frames, 14, 2), np.float32) 70 | gt3ds = univ_annot3.reshape(num_frames, -1, 3) 71 | # Convert N x 28 x . to N x 14 x 2, N x 14 x 3 72 | gt3ds = gt3ds[:, test_idx2lsp, :] 73 | img_paths = [base_path % (frame + 1) for frame in range(num_frames)] 74 | if gt3ds.shape[0] != len(img_paths): 75 | print('Not same paths?') 76 | import ipdb 77 | ipdb.set_trace() 78 | use_these = valid_frame 79 | all_gt2ds.append(gt2ds[use_these]) 80 | all_gt3ds.append(gt3ds[use_these]) 81 | cams = np.zeros((gt2ds[use_these].shape[0], 3), np.float32) 82 | all_cams.append(cams) 83 | all_img_paths.append(np.array(img_paths)[use_these].tolist()) 84 | 85 | all_gt2ds = np.transpose(np.array(all_gt2ds), [1,0,2,3]) 86 | print(all_gt2ds.shape) 87 | all_gt3ds = np.transpose(np.array(all_gt3ds), [1,0,2,3]) 88 | all_cams = np.transpose(np.array(all_cams), [1,0,2]) 89 | all_img_paths = np.transpose(np.array(all_img_paths), [1,0]) 90 | print(all_img_paths.shape) 91 | 92 | return all_img_paths, all_gt2ds, all_gt3ds, all_cams 93 | 94 | 95 | def check_good(image, gt2d): 96 | h, w, _ = image.shape 97 | x_in = np.logical_and(gt2d[:, 0] < w, gt2d[:, 0] >= 0) 98 | y_in = np.logical_and(gt2d[:, 1] < h, gt2d[:, 1] >= 0) 99 | 100 | ok_pts = np.logical_and(x_in, y_in) 101 | 102 | return np.sum(ok_pts) >= MIN_VIS_PTS 103 | 104 | 105 | def add_to_tfrecord(im_paths, 106 | gt2ds, 107 | gt3ds, 108 | cams, 109 | coder, 110 | writer, 111 | model=None, 112 | sub_path=None): 113 | """ 114 | gt2ds is 4 * 14 x 2 (lsp order) 115 | gt3ds is 4 * 14 x 3 116 | cam is (4,3,) 117 | returns: 118 | success = 1 if this is a good image 119 | 0 if most of the kps are outside the image 120 | """ 121 | # Read image 122 | images, labels, heights, widths = [], [], [], [] 123 | center_scaleds, scale_factorss = [], [] 124 | start_pts, cam_scaleds = [], [] 125 | for path, gt2d, cam in zip(im_paths, gt2ds, cams): 126 | if not exists(path): 127 | print('!!--%s doesnt exist! Skipping..--!!' % path) 128 | return False 129 | with tf.gfile.FastGFile(path, 'rb') as f: 130 | image_data = f.read() 131 | image = coder.decode_jpeg(coder.png_to_jpeg(image_data)) 132 | assert image.shape[2] == 3 133 | 134 | # All kps are visible in mpi_inf_3dhp. 135 | min_pt = np.array([0,0], np.float32) 136 | max_pt = np.array([300,300], np.float32) 137 | center = (min_pt + max_pt) / 2. 138 | scale = 1. 139 | 140 | image_scaled, scale_factors = resize_img(image, scale) 141 | height, width = image_scaled.shape[:2] 142 | joints_scaled = np.copy(gt2d) 143 | joints_scaled[:, 0] *= scale_factors[0] 144 | joints_scaled[:, 1] *= scale_factors[1] 145 | center_scaled = np.round(center * scale_factors).astype(np.int) 146 | # scale camera: Flength, px, py 147 | cam_scaled = np.copy(cam) 148 | 149 | # Crop 300x300 around the center 150 | margin = 150 151 | start_pt = np.maximum(center_scaled - margin, 0).astype(int) 152 | end_pt = (center_scaled + margin).astype(int) 153 | end_pt[0] = min(end_pt[0], width) 154 | end_pt[1] = min(end_pt[1], height) 155 | image_scaled = image_scaled[start_pt[1]:end_pt[1], start_pt[0]:end_pt[ 156 | 0], :] 157 | # Update others too. 158 | joints_scaled[:, 0] -= start_pt[0] 159 | joints_scaled[:, 1] -= start_pt[1] 160 | center_scaled -= start_pt 161 | # Update principal point: 162 | cam_scaled[1] -= start_pt[0] 163 | cam_scaled[2] -= start_pt[1] 164 | height, width = image_scaled.shape[:2] 165 | 166 | # Encode image: 167 | image_data_scaled = coder.encode_jpeg(image_scaled) 168 | label = np.vstack([joints_scaled.T, np.ones((1, joints_scaled.shape[0]))]) 169 | images.append(image_data_scaled) 170 | labels.append(label) 171 | heights.append(height) 172 | widths.append(width) 173 | center_scaleds.append(center_scaled) 174 | scale_factorss.append(scale_factors) 175 | start_pts.append(start_pt) 176 | cam_scaleds.append(cam_scaled) 177 | # pose and shape is not existent. 178 | pose, shape = [None,None,None,None], [None,None,None,None] 179 | example = convert_to_example_wmosh( 180 | images, im_paths, heights, widths, labels, center_scaleds, gt3ds / 1000., 181 | pose, shape, scale_factorss, start_pts, cam_scaleds) 182 | writer.write(example.SerializeToString()) 183 | 184 | return True 185 | 186 | 187 | def save_to_tfrecord(out_name, im_paths, gt2ds, gt3ds, cams, num_shards): 188 | coder = ImageCoder() 189 | i = 0 190 | # Count on shards 191 | fidx = 0 192 | # Count failures 193 | num_bad = 0 194 | while i < len(im_paths): 195 | tf_filename = out_name % fidx 196 | print('Starting tfrecord file %s' % tf_filename) 197 | with tf.python_io.TFRecordWriter(tf_filename) as writer: 198 | j = 0 199 | while i < len(im_paths) and j < num_shards: 200 | if i % 100 == 0: 201 | print('Reading img %d/%d' % (i, len(im_paths))) 202 | success = add_to_tfrecord(im_paths[i], gt2ds[i], gt3ds[i], 203 | cams[i], coder, writer) 204 | i += 1 205 | if success: 206 | j += 1 207 | else: 208 | num_bad += 1 209 | 210 | fidx += 1 211 | 212 | print('Done, wrote to %s, num skipped %d' % (out_name, num_bad)) 213 | 214 | 215 | def process_mpi_inf_3dhp_train(data_dir, out_dir): 216 | out_dir = join(out_dir, 'test') 217 | sub_ids = range(1, 7) 218 | seq_ids = range(1, 2) 219 | cam_ids = [0,0,0,0] 220 | 221 | if not exists(out_dir): 222 | makedirs(out_dir) 223 | 224 | out_path = join(out_dir, FLAGS.split + '_%04d.tfrecord') 225 | num_shards = FLAGS.train_shards 226 | 227 | # Load all data & shuffle it,, 228 | all_gt2ds, all_gt3ds, all_img_paths = [], [], [] 229 | all_cams = [] 230 | 231 | for sub_id in sub_ids: 232 | for seq_id in seq_ids: 233 | print('collecting S%d, Seq%d' % (sub_id, seq_id)) 234 | # Collect all data for each camera. 235 | # img_paths: N list 236 | # gt2ds/gt3ds: N x 17 x 2, N x 17 x 3 237 | img_paths, gt2ds, gt3ds, cams = get_all_data( 238 | data_dir, sub_id, seq_id, cam_ids) 239 | 240 | all_img_paths.append(img_paths) 241 | all_gt2ds.append(gt2ds) 242 | all_gt3ds.append(gt3ds) 243 | all_cams.append(cams) 244 | 245 | all_gt2ds = np.vstack(all_gt2ds) 246 | all_gt3ds = np.vstack(all_gt3ds) 247 | all_cams = np.vstack(all_cams) 248 | all_img_paths = np.vstack(all_img_paths) 249 | assert (all_gt3ds.shape[0] == len(all_img_paths)) 250 | # Now shuffle it all. 251 | shuffle_id = np.random.permutation(len(all_img_paths)) 252 | all_img_paths = all_img_paths[shuffle_id] 253 | all_gt2ds = all_gt2ds[shuffle_id] 254 | all_gt3ds = all_gt3ds[shuffle_id] 255 | all_cams = all_cams[shuffle_id] 256 | 257 | save_to_tfrecord(out_path, all_img_paths, all_gt2ds, all_gt3ds, all_cams, 258 | num_shards) 259 | 260 | 261 | def main(unused_argv): 262 | print('Saving results to %s' % FLAGS.output_directory) 263 | 264 | if not exists(FLAGS.output_directory): 265 | makedirs(FLAGS.output_directory) 266 | 267 | process_mpi_inf_3dhp_train( 268 | FLAGS.data_directory, FLAGS.output_directory) 269 | 270 | 271 | if __name__ == '__main__': 272 | tf.app.run() 273 | -------------------------------------------------------------------------------- /src_ortho/datasets/mpi_inf_3dhp_to_tfrecords.py: -------------------------------------------------------------------------------- 1 | """ Convert MPI_INF_3DHP to TFRecords """ 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | from os.path import join, exists 7 | from os import makedirs 8 | 9 | import numpy as np 10 | 11 | import tensorflow as tf 12 | 13 | from .common import convert_to_example_wmosh, ImageCoder, resize_img 14 | from .mpi_inf_3dhp.read_mpi_inf_3dhp import get_paths, read_mat, mpi_inf_3dhp_to_lsp_idx, read_camera 15 | 16 | tf.app.flags.DEFINE_string('data_directory', '/scratch1/storage/mpi_inf_3dhp/', 17 | 'data directory: top of mpi-inf-3dhp') 18 | tf.app.flags.DEFINE_string('output_directory', 19 | '/scratch1/projects/tf_datasets/mpi_inf_3dhp/', 20 | 'Output data directory') 21 | 22 | tf.app.flags.DEFINE_string('split', 'train', 'train or val') 23 | tf.app.flags.DEFINE_integer('train_shards', 500, 24 | 'Number of shards in training TFRecord files.') 25 | 26 | FLAGS = tf.app.flags.FLAGS 27 | MIN_VIS_PTS = 8 # This many points must be within the image. 28 | 29 | # To go to h36m joints: 30 | # training joints have 28 joints 31 | # test joints are 17 (H3.6M subset in CPM order) 32 | joint_idx2lsp, test_idx2lsp = mpi_inf_3dhp_to_lsp_idx() 33 | 34 | 35 | def sample_frames(gt3ds): 36 | use_these = np.zeros(gt3ds.shape[0], bool) 37 | # Always use_these first frame. 38 | use_these[0] = True 39 | prev_kp3d = gt3ds[0] 40 | for itr, kp3d in enumerate(gt3ds): 41 | if itr > 0: 42 | # Check if any joint moved more than 200mm. 43 | if not np.any(np.linalg.norm(prev_kp3d - kp3d, axis=1) >= 200): 44 | continue 45 | use_these[itr] = True 46 | prev_kp3d = kp3d 47 | 48 | return use_these 49 | 50 | 51 | def get_all_data(base_dir, sub_id, seq_id, cam_ids, all_cam_info): 52 | img_dir, anno_path = get_paths(base_dir, sub_id, seq_id) 53 | # Get data for all cameras. 54 | frames, _, annot2, annot3 = read_mat(anno_path) 55 | 56 | all_gt2ds, all_gt3ds, all_img_paths = [], [], [] 57 | all_cams = [] 58 | for cam_id in cam_ids: 59 | base_path = join(img_dir, 'video_%d' % cam_id, 'frame_%06d.jpg') 60 | num_frames = annot2[cam_id].shape[0] 61 | gt2ds = annot2[cam_id].reshape(num_frames, -1, 2) 62 | gt3ds = annot3[cam_id].reshape(num_frames, -1, 3) 63 | # Convert N x 28 x . to N x 14 x 2, N x 14 x 3 64 | gt2ds = gt2ds[:, joint_idx2lsp, :] 65 | gt3ds = gt3ds[:, joint_idx2lsp, :] 66 | img_paths = [base_path % (frame + 1) for frame in frames] 67 | if gt3ds.shape[0] != len(img_paths): 68 | print('Not same paths?') 69 | import ipdb 70 | ipdb.set_trace() 71 | use_these = sample_frames(gt3ds) 72 | all_gt2ds.append(gt2ds[use_these]) 73 | all_gt3ds.append(gt3ds[use_these]) 74 | K = all_cam_info[cam_id] 75 | flength = 0.5 * (K[0, 0] + K[1, 1]) 76 | ppt = K[:2, 2] 77 | flengths = np.tile(flength, (np.sum(use_these), 1)) 78 | ppts = np.tile(ppt, (np.sum(use_these), 1)) 79 | cams = np.hstack((flengths, ppts)) 80 | all_cams.append(cams) 81 | all_img_paths.append(np.array(img_paths)[use_these].tolist()) 82 | 83 | all_gt2ds = np.transpose(np.array(all_gt2ds), [1,0,2,3]) 84 | all_gt3ds = np.transpose(np.array(all_gt3ds), [1,0,2,3]) 85 | all_cams = np.transpose(np.array(all_cams), [1,0,2]) 86 | all_img_paths = np.transpose(np.array(all_img_paths), [1,0]) 87 | print(all_img_paths.shape) 88 | 89 | return all_img_paths, all_gt2ds, all_gt3ds, all_cams 90 | 91 | 92 | def check_good(image, gt2d): 93 | h, w, _ = image.shape 94 | x_in = np.logical_and(gt2d[:, 0] < w, gt2d[:, 0] >= 0) 95 | y_in = np.logical_and(gt2d[:, 1] < h, gt2d[:, 1] >= 0) 96 | 97 | ok_pts = np.logical_and(x_in, y_in) 98 | 99 | return np.sum(ok_pts) >= MIN_VIS_PTS 100 | 101 | 102 | def add_to_tfrecord(im_paths, 103 | gt2ds, 104 | gt3ds, 105 | cams, 106 | coder, 107 | writer, 108 | model=None, 109 | sub_path=None): 110 | """ 111 | gt2ds is 4 * 14 x 2 (lsp order) 112 | gt3ds is 4 * 14 x 3 113 | cam is (4,3,) 114 | returns: 115 | success = 1 if this is a good image 116 | 0 if most of the kps are outside the image 117 | """ 118 | # Read image 119 | images, labels, heights, widths = [], [], [], [] 120 | center_scaleds, scale_factorss = [], [] 121 | start_pts, cam_scaleds = [], [] 122 | for path, gt2d, cam in zip(im_paths, gt2ds, cams): 123 | if not exists(path): 124 | print('!!--%s doesnt exist! Skipping..--!!' % path) 125 | return False 126 | with tf.gfile.FastGFile(path, 'rb') as f: 127 | image_data = f.read() 128 | image = coder.decode_jpeg(coder.png_to_jpeg(image_data)) 129 | assert image.shape[2] == 3 130 | if np.mean(image) > 180: 131 | print("skipping {}".format(path)) 132 | return False 133 | 134 | good = check_good(image, gt2d) 135 | if not good: 136 | if FLAGS.split == 'test': 137 | print('Why no good?? shouldnt happen') 138 | import ipdb 139 | ipdb.set_trace() 140 | print("gt out of range!") 141 | return False 142 | 143 | # All kps are visible in mpi_inf_3dhp. 144 | min_pt = np.min(gt2d, axis=0) 145 | max_pt = np.max(gt2d, axis=0) 146 | person_height = np.linalg.norm(max_pt - min_pt) 147 | center = (min_pt + max_pt) / 2. 148 | scale = 150. / person_height 149 | 150 | image_scaled, scale_factors = resize_img(image, scale) 151 | height, width = image_scaled.shape[:2] 152 | joints_scaled = np.copy(gt2d) 153 | joints_scaled[:, 0] *= scale_factors[0] 154 | joints_scaled[:, 1] *= scale_factors[1] 155 | center_scaled = np.round(center * scale_factors).astype(np.int) 156 | # scale camera: Flength, px, py 157 | cam_scaled = np.copy(cam) 158 | cam_scaled[0] *= scale 159 | cam_scaled[1] *= scale_factors[0] 160 | cam_scaled[2] *= scale_factors[1] 161 | 162 | # Crop 300x300 around the center 163 | margin = 150 164 | start_pt = np.maximum(center_scaled - margin, 0).astype(int) 165 | end_pt = (center_scaled + margin).astype(int) 166 | end_pt[0] = min(end_pt[0], width) 167 | end_pt[1] = min(end_pt[1], height) 168 | image_scaled = image_scaled[start_pt[1]:end_pt[1], start_pt[0]:end_pt[ 169 | 0], :] 170 | # Update others too. 171 | joints_scaled[:, 0] -= start_pt[0] 172 | joints_scaled[:, 1] -= start_pt[1] 173 | center_scaled -= start_pt 174 | # Update principal point: 175 | cam_scaled[1] -= start_pt[0] 176 | cam_scaled[2] -= start_pt[1] 177 | height, width = image_scaled.shape[:2] 178 | 179 | # Encode image: 180 | image_data_scaled = coder.encode_jpeg(image_scaled) 181 | label = np.vstack([joints_scaled.T, np.ones((1, joints_scaled.shape[0]))]) 182 | images.append(image_data_scaled) 183 | labels.append(label) 184 | heights.append(height) 185 | widths.append(width) 186 | center_scaleds.append(center_scaled) 187 | scale_factorss.append(scale_factors) 188 | start_pts.append(start_pt) 189 | cam_scaleds.append(cam_scaled) 190 | # pose and shape is not existent. 191 | pose, shape = [None,None,None,None], [None,None,None,None] 192 | example = convert_to_example_wmosh( 193 | images, im_paths, heights, widths, labels, center_scaleds, gt3ds / 1000., 194 | pose, shape, scale_factorss, start_pts, cam_scaleds) 195 | writer.write(example.SerializeToString()) 196 | 197 | return True 198 | 199 | 200 | def save_to_tfrecord(out_name, im_paths, gt2ds, gt3ds, cams, num_shards): 201 | coder = ImageCoder() 202 | i = 0 203 | # Count on shards 204 | fidx = 0 205 | # Count failures 206 | num_bad = 0 207 | while i < len(im_paths): 208 | tf_filename = out_name % fidx 209 | print('Starting tfrecord file %s' % tf_filename) 210 | with tf.python_io.TFRecordWriter(tf_filename) as writer: 211 | j = 0 212 | while i < len(im_paths) and j < num_shards: 213 | if i % 100 == 0: 214 | print('Reading img %d/%d' % (i, len(im_paths))) 215 | success = add_to_tfrecord(im_paths[i], gt2ds[i], gt3ds[i], 216 | cams[i], coder, writer) 217 | i += 1 218 | if success: 219 | j += 1 220 | else: 221 | num_bad += 1 222 | 223 | fidx += 1 224 | 225 | print('Done, wrote to %s, num skipped %d' % (out_name, num_bad)) 226 | 227 | 228 | def process_mpi_inf_3dhp_train(data_dir, out_dir, is_train=False): 229 | if is_train: 230 | out_dir = join(out_dir, 'train') 231 | print('!train set!') 232 | sub_ids = range(1, 8) # No S8! 233 | seq_ids = range(1, 3) 234 | cam_ids = [8,0,2,7] 235 | else: # Full set!! 236 | out_dir = join(out_dir, 'val') 237 | print('doing the full train-val set!') 238 | sub_ids = range(8, 9) 239 | seq_ids = range(1, 3) 240 | cam_ids = [8,0,2,7] 241 | 242 | if not exists(out_dir): 243 | makedirs(out_dir) 244 | 245 | out_path = join(out_dir, FLAGS.split + '_%04d.tfrecord') 246 | num_shards = FLAGS.train_shards 247 | 248 | # Load all data & shuffle it,, 249 | all_gt2ds, all_gt3ds, all_img_paths = [], [], [] 250 | all_cams = [] 251 | all_cam_info = read_camera(data_dir) 252 | 253 | for sub_id in sub_ids: 254 | for seq_id in seq_ids: 255 | print('collecting S%d, Seq%d' % (sub_id, seq_id)) 256 | if (sub_id==4 and seq_id==2): # 4-2,(5-1,5-2?), 257 | print("skipping S{} Seq{}".format(sub_id, seq_id)) 258 | continue 259 | # Collect all data for each camera. 260 | # img_paths: N list 261 | # gt2ds/gt3ds: N x 17 x 2, N x 17 x 3 262 | img_paths, gt2ds, gt3ds, cams = get_all_data( 263 | data_dir, sub_id, seq_id, cam_ids, all_cam_info) 264 | 265 | all_img_paths.append(img_paths) 266 | all_gt2ds.append(gt2ds) 267 | all_gt3ds.append(gt3ds) 268 | all_cams.append(cams) 269 | 270 | all_gt2ds = np.vstack(all_gt2ds) 271 | all_gt3ds = np.vstack(all_gt3ds) 272 | all_cams = np.vstack(all_cams) 273 | all_img_paths = np.vstack(all_img_paths) 274 | assert (all_gt3ds.shape[0] == len(all_img_paths)) 275 | # Now shuffle it all. 276 | shuffle_id = np.random.permutation(len(all_img_paths)) 277 | all_img_paths = all_img_paths[shuffle_id] 278 | all_gt2ds = all_gt2ds[shuffle_id] 279 | all_gt3ds = all_gt3ds[shuffle_id] 280 | all_cams = all_cams[shuffle_id] 281 | 282 | save_to_tfrecord(out_path, all_img_paths, all_gt2ds, all_gt3ds, all_cams, 283 | num_shards) 284 | 285 | 286 | def main(unused_argv): 287 | print('Saving results to %s' % FLAGS.output_directory) 288 | 289 | if not exists(FLAGS.output_directory): 290 | makedirs(FLAGS.output_directory) 291 | 292 | if FLAGS.split == 'train' or FLAGS.split == 'val': 293 | is_train = FLAGS.split == 'train' 294 | process_mpi_inf_3dhp_train( 295 | FLAGS.data_directory, FLAGS.output_directory, is_train=is_train) 296 | else: 297 | print('Unknown split %s' % FLAGS.split) 298 | import ipdb 299 | ipdb.set_trace() 300 | 301 | 302 | if __name__ == '__main__': 303 | tf.app.run() 304 | -------------------------------------------------------------------------------- /src_ortho/datasets/mpii_to_tfrecords.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convert MPII to TFRecords. 3 | """ 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | from os import makedirs 9 | from os.path import join, exists 10 | from time import time 11 | 12 | import numpy as np 13 | 14 | import tensorflow as tf 15 | 16 | from .common import convert_to_example, ImageCoder, resize_img 17 | 18 | tf.app.flags.DEFINE_string('img_directory', 19 | '/scratch1/storage/human_datasets/mpii', 20 | 'image data directory') 21 | tf.app.flags.DEFINE_string( 22 | 'output_directory', '/Users/kanazawa/projects/datasets/tf_datasets/mpii', 23 | 'Output data directory') 24 | 25 | tf.app.flags.DEFINE_integer('train_shards', 500, 26 | 'Number of shards in training TFRecord files.') 27 | tf.app.flags.DEFINE_integer('validation_shards', 500, 28 | 'Number of shards in validation TFRecord files.') 29 | 30 | FLAGS = tf.app.flags.FLAGS 31 | 32 | 33 | def load_anno(fname): 34 | import scipy.io as sio 35 | t0 = time() 36 | print('Reading annotation..') 37 | res = sio.loadmat(fname, struct_as_record=False, squeeze_me=True) 38 | print('took %g sec..' % (time() - t0)) 39 | 40 | return res['RELEASE'] 41 | 42 | 43 | def convert_is_visible(is_visible): 44 | """ 45 | this field is u'1' or empty numpy array.. 46 | """ 47 | if isinstance(is_visible, np.ndarray): 48 | assert (is_visible.size == 0) 49 | return 0 50 | else: 51 | return int(is_visible) 52 | 53 | 54 | def read_joints(rect): 55 | """ 56 | Reads joints in the common joint order. 57 | Assumes rect has annopoints as field. 58 | 59 | Returns: 60 | joints: 3 x |common joints| 61 | """ 62 | # Mapping from MPII joints to LSP joints (0:13). In this roder: 63 | _COMMON_JOINT_IDS = [ 64 | 0, # R ankle 65 | 1, # R knee 66 | 2, # R hip 67 | 3, # L hip 68 | 4, # L knee 69 | 5, # L ankle 70 | 10, # R Wrist 71 | 11, # R Elbow 72 | 12, # R shoulder 73 | 13, # L shoulder 74 | 14, # L Elbow 75 | 15, # L Wrist 76 | 8, # Neck top 77 | 9, # Head top 78 | ] 79 | assert ('annopoints' in rect._fieldnames) 80 | points = rect.annopoints.point 81 | if not isinstance(points, np.ndarray): 82 | # There is only one! so ignore this image 83 | return None 84 | # Not all joints are there.. read points in a dict. 85 | read_points = {} 86 | 87 | for point in points: 88 | vis = convert_is_visible(point.is_visible) 89 | read_points[point.id] = np.array([point.x, point.y, vis]) 90 | 91 | # Go over each common joint ids 92 | joints = np.zeros((3, len(_COMMON_JOINT_IDS))) 93 | for i, jid in enumerate(_COMMON_JOINT_IDS): 94 | if jid in read_points.keys(): 95 | joints[:, i] = read_points[jid] 96 | # If it's annotated, then use it as visible 97 | # (in this visible = 0 iff no gt label) 98 | joints[2, i] = 1. 99 | 100 | return joints 101 | 102 | 103 | def parse_people(anno_info, single_persons): 104 | ''' 105 | Parses people from rect annotation. 106 | Assumes input is train data. 107 | Input: 108 | img_dir: str 109 | anno_info: annolist[img_id] obj 110 | single_persons: rect id idx for "single" people 111 | 112 | Returns: 113 | people - list of annotated single-people in this image. 114 | Its Entries are tuple (label, img_scale, obj_pos) 115 | ''' 116 | # No single persons in this image. 117 | if single_persons.size == 0: 118 | return [] 119 | 120 | rects = anno_info.annorect 121 | if not isinstance(rects, np.ndarray): 122 | rects = np.array([rects]) 123 | 124 | # Read each human: 125 | people = [] 126 | 127 | for ridx in single_persons: 128 | rect = rects[ridx - 1] 129 | pos = np.array([rect.objpos.x, rect.objpos.y]) 130 | joints = read_joints(rect) 131 | if joints is None: 132 | continue 133 | # Compute the scale using the keypoints so the person is 150px. 134 | visible = joints[2, :].astype(bool) 135 | # If ankles are visible 136 | if visible[0] or visible[5]: 137 | min_pt = np.min(joints[:2, visible], axis=1) 138 | max_pt = np.max(joints[:2, visible], axis=1) 139 | person_height = np.linalg.norm(max_pt - min_pt) 140 | scale = 150. / person_height 141 | else: 142 | # Torso points left should, right shold, right hip, left hip 143 | # torso_points = joints[:, [8, 9, 3, 2]] 144 | torso_heights = [] 145 | if visible[13] and visible[2]: 146 | torso_heights.append( 147 | np.linalg.norm(joints[:2, 13] - joints[:2, 2])) 148 | if visible[13] and visible[3]: 149 | torso_heights.append( 150 | np.linalg.norm(joints[:2, 13] - joints[:2, 3])) 151 | # Make torso 75px 152 | if len(torso_heights) > 0: 153 | scale = 75. / np.mean(torso_heights) 154 | else: 155 | if visible[8] and visible[2]: 156 | torso_heights.append( 157 | np.linalg.norm(joints[:2, 8] - joints[:2, 2])) 158 | if visible[9] and visible[3]: 159 | torso_heights.append( 160 | np.linalg.norm(joints[:2, 9] - joints[:2, 3])) 161 | if len(torso_heights) > 0: 162 | scale = 56. / np.mean(torso_heights) 163 | else: 164 | # Skip, person is too close. 165 | continue 166 | 167 | people.append((joints, scale, pos)) 168 | 169 | return people 170 | 171 | 172 | def add_to_tfrecord(anno, img_id, img_dir, coder, writer, is_train): 173 | """ 174 | Add each "single person" in this image. 175 | anno - the entire annotation file. 176 | 177 | Returns: 178 | The number of people added. 179 | """ 180 | anno_info = anno.annolist[img_id] 181 | # Make it consistent,, always a numpy array. 182 | single_persons = anno.single_person[img_id] 183 | if not isinstance(single_persons, np.ndarray): 184 | single_persons = np.array([single_persons]) 185 | 186 | people = parse_people(anno_info, single_persons) 187 | 188 | if len(people) == 0: 189 | return 0 190 | 191 | # Add each people to tf record 192 | image_path = join(img_dir, anno_info.image.name) 193 | with tf.gfile.FastGFile(image_path, 'rb') as f: 194 | image_data = f.read() 195 | image = coder.decode_jpeg(image_data) 196 | 197 | for joints, scale, pos in people: 198 | # Scale image: 199 | image_scaled, scale_factors = resize_img(image, scale) 200 | height, width = image_scaled.shape[:2] 201 | joints_scaled = np.copy(joints) 202 | joints_scaled[0, :] *= scale_factors[0] 203 | joints_scaled[1, :] *= scale_factors[1] 204 | 205 | visible = joints[2, :].astype(bool) 206 | min_pt = np.min(joints_scaled[:2, visible], axis=1) 207 | max_pt = np.max(joints_scaled[:2, visible], axis=1) 208 | center = (min_pt + max_pt) / 2. 209 | 210 | ## Crop 600x600 around this image.. 211 | margin = 300 212 | start_pt = np.maximum(center - margin, 0).astype(int) 213 | end_pt = (center + margin).astype(int) 214 | end_pt[0] = min(end_pt[0], width) 215 | end_pt[1] = min(end_pt[1], height) 216 | image_scaled = image_scaled[start_pt[1]:end_pt[1], start_pt[0]:end_pt[ 217 | 0], :] 218 | # Update others oo. 219 | joints_scaled[0, :] -= start_pt[0] 220 | joints_scaled[1, :] -= start_pt[1] 221 | center -= start_pt 222 | height, width = image_scaled.shape[:2] 223 | 224 | # Encode image: 225 | image_data_scaled = coder.encode_jpeg(image_scaled) 226 | 227 | example = convert_to_example(image_data_scaled, image_path, height, 228 | width, joints_scaled, center) 229 | writer.write(example.SerializeToString()) 230 | 231 | # Finally return how many were written. 232 | return len(people) 233 | 234 | 235 | def process_mpii(anno, img_dir, out_dir, num_shards, is_train=True): 236 | all_ids = np.array(range(len(anno.annolist))) 237 | if is_train: 238 | out_path = join(out_dir, 'train_%03d.tfrecord') 239 | img_inds = all_ids[anno.img_train.astype('bool')] 240 | else: 241 | out_path = join(out_dir, 'test_%03d.tfrecord') 242 | img_inds = all_ids[np.logical_not(anno.img_train)] 243 | print('Not implemented for test data') 244 | exit(1) 245 | 246 | # MPII annotation is tricky (maybe the way scipy reads them) 247 | # If there's only 1 person in the image, annorect is not an array 248 | # So just go over each image, and add every single_person in that image 249 | # add_to_tfrecords returns the # of ppl added. 250 | # So it's possible some shards go over the limit but this is ok. 251 | 252 | coder = ImageCoder() 253 | 254 | i = 0 255 | # Count on shards 256 | fidx = 0 257 | num_ppl = 0 258 | while i < len(img_inds): 259 | 260 | tf_filename = out_path % fidx 261 | print('Starting tfrecord file %s' % tf_filename) 262 | with tf.python_io.TFRecordWriter(tf_filename) as writer: 263 | # Count on total ppl in each shard 264 | num_ppl = 0 265 | while i < len(img_inds) and num_ppl < num_shards: 266 | if i % 100 == 0: 267 | print('Reading img %d/%d' % (i, len(img_inds))) 268 | num_ppl += add_to_tfrecord(anno, img_inds[i], img_dir, coder, 269 | writer, is_train) 270 | i += 1 271 | 272 | fidx += 1 273 | 274 | 275 | def main(unused_argv): 276 | print('Saving results to %s' % FLAGS.output_directory) 277 | 278 | if not exists(FLAGS.output_directory): 279 | makedirs(FLAGS.output_directory) 280 | 281 | anno_mat = join(FLAGS.img_directory, 'annotations', 282 | 'mpii_human_pose_v1_u12_1.mat') 283 | anno = load_anno(anno_mat) 284 | 285 | img_dir = join(FLAGS.img_directory, 'images') 286 | process_mpii( 287 | anno, 288 | img_dir, 289 | FLAGS.output_directory, 290 | FLAGS.train_shards, 291 | is_train=True) 292 | 293 | 294 | if __name__ == '__main__': 295 | tf.app.run() 296 | -------------------------------------------------------------------------------- /src_ortho/datasets/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /src_ortho/datasets/pycocotools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/datasets/pycocotools/__init__.pyc -------------------------------------------------------------------------------- /src_ortho/datasets/pycocotools/_mask.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c 2 | # distutils: sources = ../common/maskApi.c 3 | 4 | #************************************************************************** 5 | # Microsoft COCO Toolbox. version 2.0 6 | # Data, paper, and tutorials available at: http://mscoco.org/ 7 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 8 | # Licensed under the Simplified BSD License [see coco/license.txt] 9 | #************************************************************************** 10 | 11 | __author__ = 'tsungyi' 12 | 13 | import sys 14 | PYTHON_VERSION = sys.version_info[0] 15 | 16 | # import both Python-level and C-level symbols of Numpy 17 | # the API uses Numpy to interface C and Python 18 | import numpy as np 19 | cimport numpy as np 20 | from libc.stdlib cimport malloc, free 21 | 22 | # intialized Numpy. must do. 23 | np.import_array() 24 | 25 | # import numpy C function 26 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management 27 | cdef extern from "numpy/arrayobject.h": 28 | void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) 29 | 30 | # Declare the prototype of the C functions in MaskApi.h 31 | cdef extern from "maskApi.h": 32 | ctypedef unsigned int uint 33 | ctypedef unsigned long siz 34 | ctypedef unsigned char byte 35 | ctypedef double* BB 36 | ctypedef struct RLE: 37 | siz h, 38 | siz w, 39 | siz m, 40 | uint* cnts, 41 | void rlesInit( RLE **R, siz n ) 42 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) 43 | void rleDecode( const RLE *R, byte *mask, siz n ) 44 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) 45 | void rleArea( const RLE *R, siz n, uint *a ) 46 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) 47 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) 48 | void rleToBbox( const RLE *R, BB bb, siz n ) 49 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) 50 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) 51 | char* rleToString( const RLE *R ) 52 | void rleFrString( RLE *R, char *s, siz h, siz w ) 53 | 54 | # python class to wrap RLE array in C 55 | # the class handles the memory allocation and deallocation 56 | cdef class RLEs: 57 | cdef RLE *_R 58 | cdef siz _n 59 | 60 | def __cinit__(self, siz n =0): 61 | rlesInit(&self._R, n) 62 | self._n = n 63 | 64 | # free the RLE array here 65 | def __dealloc__(self): 66 | if self._R is not NULL: 67 | for i in range(self._n): 68 | free(self._R[i].cnts) 69 | free(self._R) 70 | def __getattr__(self, key): 71 | if key == 'n': 72 | return self._n 73 | raise AttributeError(key) 74 | 75 | # python class to wrap Mask array in C 76 | # the class handles the memory allocation and deallocation 77 | cdef class Masks: 78 | cdef byte *_mask 79 | cdef siz _h 80 | cdef siz _w 81 | cdef siz _n 82 | 83 | def __cinit__(self, h, w, n): 84 | self._mask = malloc(h*w*n* sizeof(byte)) 85 | self._h = h 86 | self._w = w 87 | self._n = n 88 | # def __dealloc__(self): 89 | # the memory management of _mask has been passed to np.ndarray 90 | # it doesn't need to be freed here 91 | 92 | # called when passing into np.array() and return an np.ndarray in column-major order 93 | def __array__(self): 94 | cdef np.npy_intp shape[1] 95 | shape[0] = self._h*self._w*self._n 96 | # Create a 1D array, and reshape it to fortran/Matlab column-major array 97 | ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F') 98 | # The _mask allocated by Masks is now handled by ndarray 99 | PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA) 100 | return ndarray 101 | 102 | # internal conversion from Python RLEs object to compressed RLE format 103 | def _toString(RLEs Rs): 104 | cdef siz n = Rs.n 105 | cdef bytes py_string 106 | cdef char* c_string 107 | objs = [] 108 | for i in range(n): 109 | c_string = rleToString( &Rs._R[i] ) 110 | py_string = c_string 111 | objs.append({ 112 | 'size': [Rs._R[i].h, Rs._R[i].w], 113 | 'counts': py_string 114 | }) 115 | free(c_string) 116 | return objs 117 | 118 | # internal conversion from compressed RLE format to Python RLEs object 119 | def _frString(rleObjs): 120 | cdef siz n = len(rleObjs) 121 | Rs = RLEs(n) 122 | cdef bytes py_string 123 | cdef char* c_string 124 | for i, obj in enumerate(rleObjs): 125 | if PYTHON_VERSION == 2: 126 | py_string = str(obj['counts']).encode('utf8') 127 | elif PYTHON_VERSION == 3: 128 | py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts'] 129 | else: 130 | raise Exception('Python version must be 2 or 3') 131 | c_string = py_string 132 | rleFrString( &Rs._R[i], c_string, obj['size'][0], obj['size'][1] ) 133 | return Rs 134 | 135 | # encode mask to RLEs objects 136 | # list of RLE string can be generated by RLEs member function 137 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask): 138 | h, w, n = mask.shape[0], mask.shape[1], mask.shape[2] 139 | cdef RLEs Rs = RLEs(n) 140 | rleEncode(Rs._R,mask.data,h,w,n) 141 | objs = _toString(Rs) 142 | return objs 143 | 144 | # decode mask from compressed list of RLE string or RLEs object 145 | def decode(rleObjs): 146 | cdef RLEs Rs = _frString(rleObjs) 147 | h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n 148 | masks = Masks(h, w, n) 149 | rleDecode(Rs._R, masks._mask, n); 150 | return np.array(masks) 151 | 152 | def merge(rleObjs, intersect=0): 153 | cdef RLEs Rs = _frString(rleObjs) 154 | cdef RLEs R = RLEs(1) 155 | rleMerge(Rs._R, R._R, Rs._n, intersect) 156 | obj = _toString(R)[0] 157 | return obj 158 | 159 | def area(rleObjs): 160 | cdef RLEs Rs = _frString(rleObjs) 161 | cdef uint* _a = malloc(Rs._n* sizeof(uint)) 162 | rleArea(Rs._R, Rs._n, _a) 163 | cdef np.npy_intp shape[1] 164 | shape[0] = Rs._n 165 | a = np.array((Rs._n, ), dtype=np.uint8) 166 | a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a) 167 | PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA) 168 | return a 169 | 170 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox). 171 | def iou( dt, gt, pyiscrowd ): 172 | def _preproc(objs): 173 | if len(objs) == 0: 174 | return objs 175 | if type(objs) == np.ndarray: 176 | if len(objs.shape) == 1: 177 | objs = objs.reshape((objs[0], 1)) 178 | # check if it's Nx4 bbox 179 | if not len(objs.shape) == 2 or not objs.shape[1] == 4: 180 | raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension') 181 | objs = objs.astype(np.double) 182 | elif type(objs) == list: 183 | # check if list is in box format and convert it to np.ndarray 184 | isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs])) 185 | isrle = np.all(np.array([type(obj) == dict for obj in objs])) 186 | if isbox: 187 | objs = np.array(objs, dtype=np.double) 188 | if len(objs.shape) == 1: 189 | objs = objs.reshape((1,objs.shape[0])) 190 | elif isrle: 191 | objs = _frString(objs) 192 | else: 193 | raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])') 194 | else: 195 | raise Exception('unrecognized type. The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.') 196 | return objs 197 | def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 198 | rleIou( dt._R, gt._R, m, n, iscrowd.data, _iou.data ) 199 | def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 200 | bbIou( dt.data, gt.data, m, n, iscrowd.data, _iou.data ) 201 | def _len(obj): 202 | cdef siz N = 0 203 | if type(obj) == RLEs: 204 | N = obj.n 205 | elif len(obj)==0: 206 | pass 207 | elif type(obj) == np.ndarray: 208 | N = obj.shape[0] 209 | return N 210 | # convert iscrowd to numpy array 211 | cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8) 212 | # simple type checking 213 | cdef siz m, n 214 | dt = _preproc(dt) 215 | gt = _preproc(gt) 216 | m = _len(dt) 217 | n = _len(gt) 218 | if m == 0 or n == 0: 219 | return [] 220 | if not type(dt) == type(gt): 221 | raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray') 222 | 223 | # define local variables 224 | cdef double* _iou = 0 225 | cdef np.npy_intp shape[1] 226 | # check type and assign iou function 227 | if type(dt) == RLEs: 228 | _iouFun = _rleIou 229 | elif type(dt) == np.ndarray: 230 | _iouFun = _bbIou 231 | else: 232 | raise Exception('input data type not allowed.') 233 | _iou = malloc(m*n* sizeof(double)) 234 | iou = np.zeros((m*n, ), dtype=np.double) 235 | shape[0] = m*n 236 | iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou) 237 | PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA) 238 | _iouFun(dt, gt, iscrowd, m, n, iou) 239 | return iou.reshape((m,n), order='F') 240 | 241 | def toBbox( rleObjs ): 242 | cdef RLEs Rs = _frString(rleObjs) 243 | cdef siz n = Rs.n 244 | cdef BB _bb = malloc(4*n* sizeof(double)) 245 | rleToBbox( Rs._R, _bb, n ) 246 | cdef np.npy_intp shape[1] 247 | shape[0] = 4*n 248 | bb = np.array((1,4*n), dtype=np.double) 249 | bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4)) 250 | PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA) 251 | return bb 252 | 253 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ): 254 | cdef siz n = bb.shape[0] 255 | Rs = RLEs(n) 256 | rleFrBbox( Rs._R, bb.data, h, w, n ) 257 | objs = _toString(Rs) 258 | return objs 259 | 260 | def frPoly( poly, siz h, siz w ): 261 | cdef np.ndarray[np.double_t, ndim=1] np_poly 262 | n = len(poly) 263 | Rs = RLEs(n) 264 | for i, p in enumerate(poly): 265 | np_poly = np.array(p, dtype=np.double, order='F') 266 | rleFrPoly( &Rs._R[i], np_poly.data, int(len(p)/2), h, w ) 267 | objs = _toString(Rs) 268 | return objs 269 | 270 | def frUncompressedRLE(ucRles, siz h, siz w): 271 | cdef np.ndarray[np.uint32_t, ndim=1] cnts 272 | cdef RLE R 273 | cdef uint *data 274 | n = len(ucRles) 275 | objs = [] 276 | for i in range(n): 277 | Rs = RLEs(1) 278 | cnts = np.array(ucRles[i]['counts'], dtype=np.uint32) 279 | # time for malloc can be saved here but it's fine 280 | data = malloc(len(cnts)* sizeof(uint)) 281 | for j in range(len(cnts)): 282 | data[j] = cnts[j] 283 | R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), data) 284 | Rs._R[0] = R 285 | objs.append(_toString(Rs)[0]) 286 | return objs 287 | 288 | def frPyObjects(pyobj, h, w): 289 | # encode rle from a list of python objects 290 | if type(pyobj) == np.ndarray: 291 | objs = frBbox(pyobj, h, w) 292 | elif type(pyobj) == list and len(pyobj[0]) == 4: 293 | objs = frBbox(pyobj, h, w) 294 | elif type(pyobj) == list and len(pyobj[0]) > 4: 295 | objs = frPoly(pyobj, h, w) 296 | elif type(pyobj) == list and type(pyobj[0]) == dict \ 297 | and 'counts' in pyobj[0] and 'size' in pyobj[0]: 298 | objs = frUncompressedRLE(pyobj, h, w) 299 | # encode rle from single python object 300 | elif type(pyobj) == list and len(pyobj) == 4: 301 | objs = frBbox([pyobj], h, w)[0] 302 | elif type(pyobj) == list and len(pyobj) > 4: 303 | objs = frPoly([pyobj], h, w)[0] 304 | elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj: 305 | objs = frUncompressedRLE([pyobj], h, w)[0] 306 | else: 307 | raise Exception('input type is not supported.') 308 | return objs 309 | -------------------------------------------------------------------------------- /src_ortho/datasets/pycocotools/_mask.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/datasets/pycocotools/_mask.so -------------------------------------------------------------------------------- /src_ortho/datasets/pycocotools/coco.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/datasets/pycocotools/coco.pyc -------------------------------------------------------------------------------- /src_ortho/datasets/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /src_ortho/datasets/pycocotools/mask.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/datasets/pycocotools/mask.pyc -------------------------------------------------------------------------------- /src_ortho/datasets/smpl_to_tfrecords.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convert MoCap SMPL data to tfrecords. 3 | """ 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | from os import makedirs 9 | from os.path import join, exists 10 | import numpy as np 11 | from glob import glob 12 | import cPickle as pickle 13 | 14 | import tensorflow as tf 15 | import cv2 16 | 17 | from .common import float_feature 18 | 19 | tf.app.flags.DEFINE_string( 20 | 'dataset_name', 'neutrSMPL_CMU', 21 | 'neutrSMPL_CMU, neutrSMPL_H3.6, or neutrSMPL_jointLim') 22 | tf.app.flags.DEFINE_string('data_directory', 23 | '/scratch1/hmr_multiview/neutrMosh', 24 | 'data directory where SMPL npz/pkl lies') 25 | tf.app.flags.DEFINE_string('output_directory', 26 | '/scratch1/hmr_multiview/tmp/mocap_neutrMosh/', 27 | 'Output data directory') 28 | 29 | tf.app.flags.DEFINE_integer('num_shards', 10000, 30 | 'Number of shards in TFRecord files.') 31 | 32 | FLAGS = tf.app.flags.FLAGS 33 | 34 | 35 | def convert_to_example(pose, shape=None): 36 | """Build an Example proto for an image example. 37 | Args: 38 | pose: 72-D vector, float 39 | shape: 10-D vector, float 40 | Returns: 41 | Example proto 42 | """ 43 | if shape is None: 44 | example = tf.train.Example(features=tf.train.Features( 45 | feature={ 46 | 'pose': float_feature(pose.astype(np.float)) 47 | })) 48 | else: 49 | example = tf.train.Example(features=tf.train.Features( 50 | feature={ 51 | 'pose': float_feature(pose.astype(np.float)), 52 | 'shape': float_feature(shape.astype(np.float)), 53 | })) 54 | 55 | return example 56 | 57 | 58 | def process_smpl_mocap(all_pkls, out_dir, num_shards, dataset_name): 59 | st = 0 60 | step = 1 61 | for pkl in all_pkls: 62 | with open(pkl, 'rb') as f: 63 | res = pickle.load(f) 64 | if 'poses' in res.keys(): 65 | tmp = res['poses'] 66 | else: 67 | tmp = res['new_poses'] 68 | rev = -tmp[0,:3] 69 | num_frame = 250 70 | print(tmp.shape) 71 | if tmp.shape[0] < num_frame * step: 72 | continue 73 | with open(join(out_dir, 'pose%04d.txt'%st), 'w') as f: 74 | for i in range(25): 75 | pre = tmp[0] / 25. * i 76 | pre[:3] = (0,0,0) 77 | for j in range(pre.shape[0]): 78 | f.write('{},'.format(pre[j])) 79 | f.write('\n') 80 | for i in range(num_frame): 81 | tmp[i*step,:3] = cv2.Rodrigues(np.matmul( 82 | cv2.Rodrigues(rev)[0], 83 | cv2.Rodrigues(tmp[i*step,:3])[0] 84 | ))[0][:,0] 85 | for j in range(tmp.shape[1]): 86 | f.write('{},'.format(tmp[i*step,j])) 87 | f.write('\n') 88 | st += 1 89 | # all_poses, all_shapes, all_shapes_unique = [], [], [] 90 | # for pkl in all_pkls: 91 | # with open(pkl, 'rb') as f: 92 | # res = pickle.load(f) 93 | # if 'poses' in res.keys(): 94 | # all_poses.append(res['poses']) 95 | # num_poses_here = res['poses'].shape[0] 96 | # else: 97 | # all_poses.append(res['new_poses']) 98 | # num_poses_here = res['new_poses'].shape[0] 99 | # all_shapes.append( 100 | # np.tile(np.reshape(res['betas'], (10, 1)), num_poses_here)) 101 | # all_shapes_unique.append(res['betas']) 102 | 103 | # all_poses = np.vstack(all_poses) 104 | # all_shapes = np.hstack(all_shapes).T 105 | 106 | # out_path = join(out_dir, '%s_%%03d.tfrecord' % dataset_name) 107 | 108 | # # shuffle results 109 | # num_mocap = all_poses.shape[0] 110 | # shuffle_id = np.random.permutation(num_mocap) 111 | # all_poses = all_poses[shuffle_id] 112 | # all_shapes = all_shapes[shuffle_id] 113 | 114 | # i = 0 115 | # fidx = 0 116 | # while i < num_mocap: 117 | # # Open new TFRecord file. 118 | # tf_filename = out_path % fidx 119 | # print('Starting tfrecord file %s' % tf_filename) 120 | # with tf.python_io.TFRecordWriter(tf_filename) as writer: 121 | # j = 0 122 | # while i < num_mocap and j < num_shards: 123 | # if i % 10000 == 0: 124 | # print('Converting mosh %d/%d' % (i, num_mocap)) 125 | # example = convert_to_example(all_poses[i], shape=all_shapes[i]) 126 | # writer.write(example.SerializeToString()) 127 | # i += 1 128 | # j += 1 129 | 130 | # fidx += 1 131 | 132 | 133 | def main(unused_argv): 134 | data_dir = join(FLAGS.data_directory, FLAGS.dataset_name) 135 | # Ignore H3.6M test subjects!! 136 | all_pkl = sorted([ 137 | f for f in glob(join(data_dir, '*/*.pkl')) 138 | if 'S9' not in f and 'S11' not in f 139 | ]) 140 | if len(all_pkl) == 0: 141 | print('Something is wrong with the path bc I cant find any pkls!') 142 | import ipdb; ipdb.set_trace() 143 | 144 | print('Saving results to %s' % FLAGS.output_directory) 145 | 146 | if not exists(FLAGS.output_directory): 147 | makedirs(FLAGS.output_directory) 148 | 149 | all_pkl = ['/scratch1/hmr_multiview/neutrMosh/neutrSMPL_CMU/08/08_04.pkl'] 150 | process_smpl_mocap(all_pkl, FLAGS.output_directory, FLAGS.num_shards, 151 | FLAGS.dataset_name) 152 | 153 | 154 | if __name__ == '__main__': 155 | tf.app.run() 156 | -------------------------------------------------------------------------------- /src_ortho/main.py: -------------------------------------------------------------------------------- 1 | """ Driver for train """ 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import tensorflow as tf 8 | 9 | from .config import get_config, prepare_dirs, save_config 10 | from .data_loader import DataLoader 11 | from .trainer import HMRTrainer 12 | import os 13 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 14 | 15 | 16 | def main(config): 17 | prepare_dirs(config) 18 | 19 | # Load data on CPU 20 | with tf.device("/cpu:0"): 21 | data_loader = DataLoader(config) 22 | image_loader = data_loader.load() 23 | smpl_loader = data_loader.get_smpl_loader() 24 | 25 | trainer = HMRTrainer(config, image_loader, smpl_loader) 26 | save_config(config) 27 | trainer.train() 28 | 29 | 30 | if __name__ == '__main__': 31 | config = get_config() 32 | main(config) 33 | -------------------------------------------------------------------------------- /src_ortho/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines networks. 3 | 4 | @Encoder_resnet 5 | @Encoder_resnet_v1_101 6 | @Encoder_fc3_dropout 7 | 8 | @Discriminator_separable_rotations 9 | 10 | Helper: 11 | @get_encoder_fn_separate 12 | """ 13 | 14 | from __future__ import absolute_import 15 | from __future__ import division 16 | from __future__ import print_function 17 | 18 | import tensorflow as tf 19 | import tensorflow.contrib.slim as slim 20 | 21 | from tensorflow.contrib.layers.python.layers.initializers import variance_scaling_initializer 22 | 23 | 24 | def Encoder_resnet(x, is_training=True, weight_decay=0.001, reuse=False): 25 | """ 26 | Resnet v2-50 27 | Assumes input is [batch, height_in, width_in, channels]!! 28 | Input: 29 | - x: N x H x W x 3 30 | - weight_decay: float 31 | - reuse: bool->True if test 32 | 33 | Outputs: 34 | - cam: N x 3 35 | - Pose vector: N x 72 36 | - Shape vector: N x 10 37 | - variables: tf variables 38 | """ 39 | from tensorflow.contrib.slim.python.slim.nets import resnet_v2 40 | with tf.name_scope("Encoder_resnet", [x]): 41 | with slim.arg_scope( 42 | resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): 43 | net, end_points = resnet_v2.resnet_v2_50( 44 | x, 45 | num_classes=None, 46 | is_training=is_training, 47 | reuse=reuse, 48 | scope='resnet_v2_50') 49 | net = tf.squeeze(net, axis=[1, 2]) 50 | variables = tf.contrib.framework.get_variables('resnet_v2_50') 51 | return net, variables 52 | 53 | 54 | def Encoder_fc3_dropout(x, 55 | num_output=85, 56 | is_training=True, 57 | reuse=False, 58 | name="3D_module"): 59 | """ 60 | 3D inference module. 3 MLP layers (last is the output) 61 | With dropout on first 2. 62 | Input: 63 | - x: N x [|img_feat|, |3D_param|] 64 | - reuse: bool 65 | 66 | Outputs: 67 | - 3D params: N x num_output 68 | if orthogonal: 69 | either 85: (3 + 24*3 + 10) or 109 (3 + 24*4 + 10) for factored axis-angle representation 70 | if perspective: 71 | 86: (f, tx, ty, tz) + 24*3 + 10, or 110 for factored axis-angle. 72 | - variables: tf variables 73 | """ 74 | if reuse: 75 | print('Reuse is on!') 76 | with tf.variable_scope(name, reuse=reuse) as scope: 77 | net = slim.fully_connected(x, 1024, scope='fc1') 78 | net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout1') 79 | net = slim.fully_connected(net, 1024, scope='fc2') 80 | net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout2') 81 | small_xavier = variance_scaling_initializer( 82 | factor=.01, mode='FAN_AVG', uniform=True) 83 | net = slim.fully_connected( 84 | net, 85 | num_output, 86 | activation_fn=None, 87 | weights_initializer=small_xavier, 88 | scope='fc3') 89 | 90 | variables = tf.contrib.framework.get_variables(scope) 91 | return net, variables 92 | 93 | 94 | def get_encoder_fn_separate(model_type): 95 | """ 96 | Retrieves diff encoder fn for image and 3D 97 | """ 98 | encoder_fn = None 99 | threed_fn = None 100 | if 'resnet' in model_type: 101 | encoder_fn = Encoder_resnet 102 | else: 103 | print('Unknown encoder %s!' % model_type) 104 | exit(1) 105 | 106 | if 'fc3_dropout' in model_type: 107 | threed_fn = Encoder_fc3_dropout 108 | 109 | if encoder_fn is None or threed_fn is None: 110 | print('Dont know what encoder to use for %s' % model_type) 111 | import ipdb 112 | ipdb.set_trace() 113 | 114 | return encoder_fn, threed_fn 115 | 116 | 117 | def Discriminator_separable_rotations( 118 | poses, 119 | shapes, 120 | weight_decay, 121 | ): 122 | """ 123 | 23 Discriminators on each joint + 1 for all joints + 1 for shape. 124 | To share the params on rotations, this treats the 23 rotation matrices 125 | as a "vertical image": 126 | Do 1x1 conv, then send off to 23 independent classifiers. 127 | 128 | Input: 129 | - poses: N x 23 x 1 x 9, NHWC ALWAYS!! 130 | - shapes: N x 10 131 | - weight_decay: float 132 | 133 | Outputs: 134 | - prediction: N x (1+23) or N x (1+23+1) if do_joint is on. 135 | - variables: tf variables 136 | """ 137 | data_format = "NHWC" 138 | with tf.name_scope("Discriminator_sep_rotations", [poses, shapes]): 139 | with tf.variable_scope("D") as scope: 140 | with slim.arg_scope( 141 | [slim.conv2d, slim.fully_connected], 142 | weights_regularizer=slim.l2_regularizer(weight_decay)): 143 | with slim.arg_scope([slim.conv2d], data_format=data_format): 144 | poses = slim.conv2d(poses, 32, [1, 1], scope='D_conv1') 145 | poses = slim.conv2d(poses, 32, [1, 1], scope='D_conv2') 146 | theta_out = [] 147 | for i in range(0, 23): 148 | theta_out.append( 149 | slim.fully_connected( 150 | poses[:, i, :, :], 151 | 1, 152 | activation_fn=None, 153 | scope="pose_out_j%d" % i)) 154 | theta_out_all = tf.squeeze(tf.stack(theta_out, axis=1)) 155 | 156 | # Do shape on it's own: 157 | shapes = slim.stack( 158 | shapes, 159 | slim.fully_connected, [10, 5], 160 | scope="shape_fc1") 161 | shape_out = slim.fully_connected( 162 | shapes, 1, activation_fn=None, scope="shape_final") 163 | """ Compute joint correlation prior!""" 164 | nz_feat = 1024 165 | poses_all = slim.flatten(poses, scope='vectorize') 166 | poses_all = slim.fully_connected( 167 | poses_all, nz_feat, scope="D_alljoints_fc1") 168 | poses_all = slim.fully_connected( 169 | poses_all, nz_feat, scope="D_alljoints_fc2") 170 | poses_all_out = slim.fully_connected( 171 | poses_all, 172 | 1, 173 | activation_fn=None, 174 | scope="D_alljoints_out") 175 | out = tf.concat([theta_out_all, 176 | poses_all_out, shape_out], 1) 177 | 178 | variables = tf.contrib.framework.get_variables(scope) 179 | return out, variables 180 | -------------------------------------------------------------------------------- /src_ortho/models.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/models.pyc -------------------------------------------------------------------------------- /src_ortho/ops.py: -------------------------------------------------------------------------------- 1 | """ 2 | TF util operations. 3 | """ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import tensorflow as tf 10 | 11 | 12 | def keypoint_l1_loss(kp_gt, kp_pred, scale=1., name=None): 13 | """ 14 | computes: \Sum_i [0.5 * vis[i] * |kp_gt[i] - kp_pred[i]|] / (|vis|) 15 | Inputs: 16 | kp_gt : N x K x 3 17 | kp_pred: N x K x 2 18 | """ 19 | with tf.name_scope(name, "keypoint_l1_loss", [kp_gt, kp_pred]): 20 | kp_gt = tf.reshape(kp_gt, (-1, 3)) 21 | kp_pred = tf.reshape(kp_pred, (-1, 2)) 22 | 23 | vis = tf.expand_dims(tf.cast(kp_gt[:, 2], tf.float32), 1) 24 | res = tf.losses.absolute_difference(kp_gt[:, :2], kp_pred, weights=vis) 25 | return res 26 | 27 | 28 | def compute_3d_loss(params_pred, params_gt, has_gt3d): 29 | """ 30 | Computes the l2 loss between 3D params pred and gt for those data that has_gt3d is True. 31 | Parameters to compute loss over: 32 | 3Djoints: 14*3 = 42 33 | rotations:(24*9)= 216 34 | shape: 10 35 | total input: 226 (gt SMPL params) or 42 (just joints) 36 | 37 | Inputs: 38 | params_pred: N x {226, 42} 39 | params_gt: N x {226, 42} 40 | # has_gt3d: (N,) bool 41 | has_gt3d: N x 1 tf.float32 of {0., 1.} 42 | """ 43 | with tf.name_scope("3d_loss", [params_pred, params_gt, has_gt3d]): 44 | weights = tf.expand_dims(tf.cast(has_gt3d, tf.float32), 1) 45 | res = tf.losses.mean_squared_error( 46 | params_gt, params_pred, weights=weights) * 0.5 47 | return res 48 | 49 | 50 | def align_by_pelvis(joints): 51 | """ 52 | Assumes joints is N x 14 x 3 in LSP order. 53 | Then hips are: [3, 2] 54 | Takes mid point of these points, then subtracts it. 55 | """ 56 | with tf.name_scope("align_by_pelvis", [joints]): 57 | left_id = 3 58 | right_id = 2 59 | pelvis = (joints[:, left_id, :] + joints[:, right_id, :]) / 2. 60 | return joints - tf.expand_dims(pelvis, axis=1) 61 | -------------------------------------------------------------------------------- /src_ortho/ops.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/ops.pyc -------------------------------------------------------------------------------- /src_ortho/tf_smpl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/tf_smpl/__init__.py -------------------------------------------------------------------------------- /src_ortho/tf_smpl/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/tf_smpl/__init__.pyc -------------------------------------------------------------------------------- /src_ortho/tf_smpl/batch_lbs.py: -------------------------------------------------------------------------------- 1 | """ Util functions for SMPL 2 | @@batch_skew 3 | @@batch_rodrigues 4 | @@batch_lrotmin 5 | @@batch_global_rigid_transformation 6 | """ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import tensorflow as tf 13 | 14 | 15 | def batch_skew(vec, batch_size=None): 16 | """ 17 | vec is N x 3, batch_size is int 18 | 19 | returns N x 3 x 3. Skew_sym version of each matrix. 20 | """ 21 | with tf.name_scope("batch_skew", [vec]): 22 | if batch_size is None: 23 | batch_size = vec.shape.as_list()[0] 24 | col_inds = tf.constant([1, 2, 3, 5, 6, 7]) 25 | indices = tf.reshape( 26 | tf.reshape(tf.range(0, batch_size) * 9, [-1, 1]) + col_inds, 27 | [-1, 1]) 28 | updates = tf.reshape( 29 | tf.stack( 30 | [ 31 | -vec[:, 2], vec[:, 1], vec[:, 2], -vec[:, 0], -vec[:, 1], 32 | vec[:, 0] 33 | ], 34 | axis=1), [-1]) 35 | out_shape = [batch_size * 9] 36 | res = tf.scatter_nd(indices, updates, out_shape) 37 | res = tf.reshape(res, [batch_size, 3, 3]) 38 | 39 | return res 40 | 41 | 42 | def batch_rodrigues(theta, name=None): 43 | """ 44 | Theta is N x 3 45 | """ 46 | with tf.name_scope(name, "batch_rodrigues", [theta]): 47 | batch_size = theta.shape.as_list()[0] 48 | 49 | # angle = tf.norm(theta, axis=1) 50 | # r = tf.expand_dims(tf.div(theta, tf.expand_dims(angle + 1e-8, -1)), -1) 51 | # angle = tf.expand_dims(tf.norm(theta, axis=1) + 1e-8, -1) 52 | angle = tf.expand_dims(tf.norm(theta + 1e-8, axis=1), -1) 53 | r = tf.expand_dims(tf.div(theta, angle), -1) 54 | 55 | angle = tf.expand_dims(angle, -1) 56 | cos = tf.cos(angle) 57 | sin = tf.sin(angle) 58 | 59 | outer = tf.matmul(r, r, transpose_b=True, name="outer") 60 | 61 | eyes = tf.tile(tf.expand_dims(tf.eye(3), 0), [batch_size, 1, 1]) 62 | R = cos * eyes + (1 - cos) * outer + sin * batch_skew( 63 | r, batch_size=batch_size) 64 | return R 65 | 66 | def batch_rodrigues_back(rot, name=None): 67 | with tf.name_scope(name, "batch_rodrigues_back", [rot]): 68 | tmp = (rot - tf.matrix_transpose(rot)) / 2 69 | v = tf.gather(tf.reshape(tmp, [-1, 9]), [7,2,3], axis=1) 70 | sin = tf.expand_dims(tf.norm(v, axis=1), -1) 71 | cos = (tf.expand_dims(rot[:,0,0]+rot[:,1,1]+rot[:,2,2], -1) - 1) / 2 72 | angle = tf.atan2(sin, cos) 73 | v = v / sin * angle 74 | return v 75 | 76 | 77 | def batch_lrotmin(theta, name=None): 78 | """ NOTE: not used bc I want to reuse R and this is simple. 79 | Output of this is used to compute joint-to-pose blend shape mapping. 80 | Equation 9 in SMPL paper. 81 | 82 | 83 | Args: 84 | pose: `Tensor`, N x 72 vector holding the axis-angle rep of K joints. 85 | This includes the global rotation so K=24 86 | 87 | Returns 88 | diff_vec : `Tensor`: N x 207 rotation matrix of 23=(K-1) joints with identity subtracted., 89 | """ 90 | with tf.name_scope(name, "batch_lrotmin", [theta]): 91 | with tf.name_scope("ignore_global"): 92 | theta = theta[:, 3:] 93 | 94 | # N*23 x 3 x 3 95 | Rs = batch_rodrigues(tf.reshape(theta, [-1, 3])) 96 | lrotmin = tf.reshape(Rs - tf.eye(3), [-1, 207]) 97 | 98 | return lrotmin 99 | 100 | 101 | def batch_global_rigid_transformation(Rs, Js, parent, rotate_base=False): 102 | """ 103 | Computes absolute joint locations given pose. 104 | 105 | rotate_base: if True, rotates the global rotation by 90 deg in x axis. 106 | if False, this is the original SMPL coordinate. 107 | 108 | Args: 109 | Rs: N x 24 x 3 x 3 rotation vector of K joints 110 | Js: N x 24 x 3, joint locations before posing 111 | parent: 24 holding the parent id for each index 112 | 113 | Returns 114 | new_J : `Tensor`: N x 24 x 3 location of absolute joints 115 | A : `Tensor`: N x 24 4 x 4 relative joint transformations for LBS. 116 | """ 117 | with tf.name_scope("batch_forward_kinematics", [Rs, Js]): 118 | N = Rs.shape[0].value 119 | if rotate_base: 120 | print('Flipping the SMPL coordinate frame!!!!') 121 | rot_x = tf.constant( 122 | [[1, 0, 0], [0, -1, 0], [0, 0, -1]], dtype=Rs.dtype) 123 | rot_x = tf.reshape(tf.tile(rot_x, [N, 1]), [N, 3, 3]) 124 | root_rotation = tf.matmul(Rs[:, 0, :, :], rot_x) 125 | else: 126 | root_rotation = Rs[:, 0, :, :] 127 | 128 | # Now Js is N x 24 x 3 x 1 129 | Js = tf.expand_dims(Js, -1) 130 | 131 | def make_A(R, t, name=None): 132 | # Rs is N x 3 x 3, ts is N x 3 x 1 133 | with tf.name_scope(name, "Make_A", [R, t]): 134 | R_homo = tf.pad(R, [[0, 0], [0, 1], [0, 0]]) 135 | t_homo = tf.concat([t, tf.ones([N, 1, 1])], 1) 136 | return tf.concat([R_homo, t_homo], 2) 137 | 138 | A0 = make_A(root_rotation, Js[:, 0]) 139 | results = [A0] 140 | for i in range(1, parent.shape[0]): 141 | j_here = Js[:, i] - Js[:, parent[i]] 142 | A_here = make_A(Rs[:, i], j_here) 143 | res_here = tf.matmul( 144 | results[parent[i]], A_here, name="propA%d" % i) 145 | results.append(res_here) 146 | 147 | # 10 x 24 x 4 x 4 148 | results = tf.stack(results, axis=1) 149 | 150 | new_J = results[:, :, :3, 3] 151 | 152 | # --- Compute relative A: Skinning is based on 153 | # how much the bone moved (not the final location of the bone) 154 | # but (final_bone - init_bone) 155 | # --- 156 | Js_w0 = tf.concat([Js, tf.zeros([N, 24, 1, 1])], 2) 157 | init_bone = tf.matmul(results, Js_w0) 158 | # Append empty 4 x 3: 159 | init_bone = tf.pad(init_bone, [[0, 0], [0, 0], [0, 0], [3, 0]]) 160 | A = results - init_bone 161 | 162 | return new_J, A 163 | -------------------------------------------------------------------------------- /src_ortho/tf_smpl/batch_lbs.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/tf_smpl/batch_lbs.pyc -------------------------------------------------------------------------------- /src_ortho/tf_smpl/batch_smpl.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tensorflow SMPL implementation as batch. 3 | Specify joint types: 4 | 'coco': Returns COCO+ 19 joints 5 | 'lsp': Returns H3.6M-LSP 14 joints 6 | Note: To get original smpl joints, use self.J_transformed 7 | """ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import cPickle as pickle 15 | 16 | import tensorflow as tf 17 | from .batch_lbs import batch_rodrigues, batch_global_rigid_transformation 18 | 19 | 20 | # There are chumpy variables so convert them to numpy. 21 | def undo_chumpy(x): 22 | return x if isinstance(x, np.ndarray) else x.r 23 | 24 | 25 | class SMPL(object): 26 | def __init__(self, pkl_path, joint_type='lsp', dtype=tf.float32): 27 | """ 28 | pkl_path is the path to a SMPL model 29 | """ 30 | # -- Load SMPL params -- 31 | mdl = '/scratch1/smplify_public/code/models/basicModel_neutral_lbs_10_207_0_v1.0.0.pkl' 32 | with open(pkl_path, 'r') as f: 33 | dd = pickle.load(f) 34 | # Mean template vertices 35 | self.v_template = tf.Variable( 36 | undo_chumpy(dd['v_template']), 37 | name='v_template', 38 | dtype=dtype, 39 | trainable=False) 40 | # Size of mesh [Number of vertices, 3] 41 | self.size = [self.v_template.shape[0].value, 3] 42 | self.num_betas = dd['shapedirs'].shape[-1] 43 | # Shape blend shape basis: 6980 x 3 x 10 44 | # reshaped to 6980*30 x 10, transposed to 10x6980*3 45 | shapedir = np.reshape( 46 | undo_chumpy(dd['shapedirs']), [-1, self.num_betas]).T 47 | self.shapedirs = tf.Variable( 48 | shapedir, name='shapedirs', dtype=dtype, trainable=False) 49 | 50 | # Regressor for joint locations given shape - 6890 x 24 51 | self.J_regressor = tf.Variable( 52 | dd['J_regressor'].T.todense(), 53 | name="J_regressor", 54 | dtype=dtype, 55 | trainable=False) 56 | 57 | # Pose blend shape basis: 6890 x 3 x 207, reshaped to 6890*30 x 207 58 | num_pose_basis = dd['posedirs'].shape[-1] 59 | # 207 x 20670 60 | posedirs = np.reshape( 61 | undo_chumpy(dd['posedirs']), [-1, num_pose_basis]).T 62 | self.posedirs = tf.Variable( 63 | posedirs, name='posedirs', dtype=dtype, trainable=False) 64 | 65 | # indices of parents for each joints 66 | self.parents = dd['kintree_table'][0].astype(np.int32) 67 | 68 | # LBS weights 69 | self.weights = tf.Variable( 70 | undo_chumpy(dd['weights']), 71 | name='lbs_weights', 72 | dtype=dtype, 73 | trainable=False) 74 | 75 | # This returns 19 keypoints: 6890 x 19 76 | with open(pkl_path, 'r') as f: 77 | dd = pickle.load(f) 78 | self.joint_regressor = tf.Variable( 79 | dd['cocoplus_regressor'].T.todense(), 80 | name="cocoplus_regressor", 81 | dtype=dtype, 82 | trainable=False) 83 | if joint_type == 'lsp': # 14 LSP joints! 84 | self.joint_regressor = self.joint_regressor[:, :14] 85 | 86 | if joint_type not in ['cocoplus', 'lsp']: 87 | print('BAD!! Unknown joint type: %s, it must be either "cocoplus" or "lsp"' % joint_type) 88 | import ipdb 89 | ipdb.set_trace() 90 | 91 | def __call__(self, beta, theta, get_skin=False, name=None): 92 | """ 93 | Obtain SMPL with shape (beta) & pose (theta) inputs. 94 | Theta includes the global rotation. 95 | Args: 96 | beta: N x 10 97 | theta: N x 72 (with 3-D axis-angle rep) 98 | 99 | Updates: 100 | self.J_transformed: N x 24 x 3 joint location after shaping 101 | & posing with beta and theta 102 | Returns: 103 | - joints: N x 19 or 14 x 3 joint locations depending on joint_type 104 | If get_skin is True, also returns 105 | - Verts: N x 6980 x 3 106 | """ 107 | 108 | with tf.name_scope(name, "smpl_main", [beta, theta]): 109 | num_batch = beta.shape[0].value 110 | 111 | # 1. Add shape blend shapes 112 | # (N x 10) x (10 x 6890*3) = N x 6890 x 3 113 | v_shaped = tf.reshape( 114 | tf.matmul(beta, self.shapedirs, name='shape_bs'), 115 | [-1, self.size[0], self.size[1]]) + self.v_template 116 | 117 | # 2. Infer shape-dependent joint locations. 118 | Jx = tf.matmul(v_shaped[:, :, 0], self.J_regressor) 119 | Jy = tf.matmul(v_shaped[:, :, 1], self.J_regressor) 120 | Jz = tf.matmul(v_shaped[:, :, 2], self.J_regressor) 121 | J = tf.stack([Jx, Jy, Jz], axis=2) 122 | 123 | # 3. Add pose blend shapes 124 | # N x 24 x 3 x 3 125 | Rs = tf.reshape( 126 | batch_rodrigues(tf.reshape(theta, [-1, 3])), [-1, 24, 3, 3]) 127 | with tf.name_scope("lrotmin"): 128 | # Ignore global rotation. 129 | pose_feature = tf.reshape(Rs[:, 1:, :, :] - tf.eye(3), 130 | [-1, 207]) 131 | 132 | # (N x 207) x (207, 20670) -> N x 6890 x 3 133 | v_posed = tf.reshape( 134 | tf.matmul(pose_feature, self.posedirs), 135 | [-1, self.size[0], self.size[1]]) + v_shaped 136 | 137 | #4. Get the global joint location 138 | self.J_transformed, A = batch_global_rigid_transformation(Rs, J, self.parents) 139 | 140 | # 5. Do skinning: 141 | # W is N x 6890 x 24 142 | W = tf.reshape( 143 | tf.tile(self.weights, [num_batch, 1]), [num_batch, -1, 24]) 144 | # (N x 6890 x 24) x (N x 24 x 16) 145 | T = tf.reshape( 146 | tf.matmul(W, tf.reshape(A, [num_batch, 24, 16])), 147 | [num_batch, -1, 4, 4]) 148 | v_posed_homo = tf.concat( 149 | [v_posed, tf.ones([num_batch, v_posed.shape[1], 1])], 2) 150 | v_homo = tf.matmul(T, tf.expand_dims(v_posed_homo, -1)) 151 | 152 | verts = v_homo[:, :, :3, 0] 153 | 154 | # Get cocoplus or lsp joints: 155 | joint_x = tf.matmul(verts[:, :, 0], self.joint_regressor) 156 | joint_y = tf.matmul(verts[:, :, 1], self.joint_regressor) 157 | joint_z = tf.matmul(verts[:, :, 2], self.joint_regressor) 158 | joints = tf.stack([joint_x, joint_y, joint_z], axis=2) 159 | 160 | if get_skin: 161 | return verts, joints, Rs 162 | else: 163 | return joints 164 | 165 | 166 | -------------------------------------------------------------------------------- /src_ortho/tf_smpl/batch_smpl.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/tf_smpl/batch_smpl.pyc -------------------------------------------------------------------------------- /src_ortho/tf_smpl/projection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Util functions implementing the camera 3 | 4 | @@batch_orth_proj_idrot 5 | """ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import tensorflow as tf 12 | 13 | def batch_orth_proj_idrot(X, camera, name=None): 14 | """ 15 | X is N x num_points x 3 16 | camera is N x 3 17 | same as applying orth_proj_idrot to each N 18 | """ 19 | with tf.name_scope(name, "batch_orth_proj_idrot", [X, camera]): 20 | # TODO check X dim size. 21 | # tf.Assert(X.shape[2] == 3, [X]) 22 | 23 | camera = tf.reshape(camera, [-1, 1, 3], name="cam_adj_shape") 24 | 25 | X_trans = X[:, :, :2] + camera[:, :, 1:] 26 | 27 | shape = tf.shape(X_trans) 28 | return tf.reshape( 29 | camera[:, :, 0] * tf.reshape(X_trans, [shape[0], -1]), shape) 30 | -------------------------------------------------------------------------------- /src_ortho/tf_smpl/projection.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/tf_smpl/projection.pyc -------------------------------------------------------------------------------- /src_ortho/tf_smpl/smpl_faces.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/tf_smpl/smpl_faces.npy -------------------------------------------------------------------------------- /src_ortho/trainer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/trainer.pyc -------------------------------------------------------------------------------- /src_ortho/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/util/__init__.py -------------------------------------------------------------------------------- /src_ortho/util/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/util/__init__.pyc -------------------------------------------------------------------------------- /src_ortho/util/data_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utils for data loading for training. 3 | """ 4 | 5 | from os.path import join 6 | from glob import glob 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | 11 | def parse_example_proto(example_serialized, num_view, has_3d=True): 12 | """Parses an Example proto. 13 | It's contents are: 14 | 15 | 'image/height' : _int64_feature(height), 16 | 'image/width' : _int64_feature(width), 17 | 'image/x' : _float_feature(label[0,:].astype(np.float)), 18 | 'image/y' : _float_feature(label[1,:].astype(np.float)), 19 | 'image/visibility' : _int64_feature(label[2,:].astype(np.int)), 20 | 'image/format' : _bytes_feature 21 | 'image/filename' : _bytes_feature 22 | 'image/encoded' : _bytes_feature 23 | 'image/face_points' : _float_feature, 24 | this is the 2D keypoints of the face points in coco 5*3 (x,y,vis) = 15 25 | 26 | if has_3d is on, it also has: 27 | 'mosh/pose' : float_feature(pose.astype(np.float)), 28 | 'mosh/shape' : float_feature(shape.astype(np.float)), 29 | # gt3d is 14x3 30 | 'mosh/gt3d' : float_feature(shape.astype(np.float)), 31 | """ 32 | feature_map = { 33 | 'image/encoded': 34 | tf.FixedLenFeature([num_view], dtype=tf.string, default_value=['' for _ in range(num_view)]), 35 | 'image/height': 36 | tf.FixedLenFeature([num_view, 1], dtype=tf.int64, default_value=[-1 for _ in range(num_view)]), 37 | 'image/width': 38 | tf.FixedLenFeature([num_view, 1], dtype=tf.int64, default_value=[-1 for _ in range(num_view)]), 39 | 'image/filename': 40 | tf.FixedLenFeature([num_view], dtype=tf.string, default_value=['' for _ in range(num_view)]), 41 | 'image/center': 42 | tf.FixedLenFeature((num_view, 2, 1), dtype=tf.int64), 43 | 'image/visibility': 44 | tf.FixedLenFeature((num_view, 1, 14), dtype=tf.int64), 45 | 'image/x': 46 | tf.FixedLenFeature((num_view, 1, 14), dtype=tf.float32), 47 | 'image/y': 48 | tf.FixedLenFeature((num_view, 1, 14), dtype=tf.float32), 49 | 'image/face_pts': 50 | tf.FixedLenFeature((num_view,3,5),dtype=tf.float32,default_value=np.zeros((num_view,3,5))), 51 | 'meta/has_3djoint': # only for differing coco and h36m 52 | tf.FixedLenFeature([1],dtype=tf.int64, default_value=True), 53 | } 54 | if num_view == 4: 55 | num_3d = 2 56 | else: 57 | num_3d = 1 58 | if has_3d: 59 | feature_map.update({ 60 | 'mosh/pose': 61 | tf.FixedLenFeature((num_view, 72), dtype=tf.float32,default_value=np.zeros((num_view,72))), 62 | 'mosh/shape': 63 | tf.FixedLenFeature((num_view, 10), dtype=tf.float32,default_value=np.zeros((num_view,10))), 64 | 'mosh/gt3d': 65 | tf.FixedLenFeature((num_view, 14 * 3), dtype=tf.float32,default_value=np.zeros((num_view,14*3))), 66 | # has_3d is for pose and shape: 0 for mpi_inf_3dhp, 1 for h3.6m. 67 | # has_3d = [has_3d_joint, has_3d_smpl] 68 | 'meta/has_3d': 69 | tf.FixedLenFeature((num_3d,), dtype=tf.int64, default_value=[0 for _ in range(num_3d)]), 70 | }) 71 | 72 | features = tf.parse_single_example(example_serialized, feature_map) 73 | 74 | height = tf.cast(features['image/height'], dtype=tf.int32) 75 | width = tf.cast(features['image/width'], dtype=tf.int32) 76 | center = tf.cast(features['image/center'], dtype=tf.int32) 77 | fname = tf.cast(features['image/filename'], dtype=tf.string) 78 | fname = tf.Print(fname, [fname], message="image name: ") 79 | 80 | vis = tf.cast(features['image/visibility'], dtype=tf.float32) 81 | x = tf.cast(features['image/x'], dtype=tf.float32) 82 | y = tf.cast(features['image/y'], dtype=tf.float32) 83 | face = tf.cast(features['image/face_pts'], dtype=tf.float32) 84 | 85 | label = tf.concat([x, y, vis], 1) 86 | label = tf.concat([label,face], 2) 87 | image = [decode_jpeg(features['image/encoded'][i]) for i in range(num_view)] 88 | image_size = tf.concat([height, width], 1) 89 | has_3djoint = tf.cast(features['meta/has_3djoint'],dtype=tf.bool) 90 | 91 | if has_3d: 92 | pose = tf.cast(features['mosh/pose'], dtype=tf.float32) 93 | shape = tf.cast(features['mosh/shape'], dtype=tf.float32) 94 | gt3d = tf.reshape( 95 | tf.cast(features['mosh/gt3d'], dtype=tf.float32), [num_view, 14, 3]) 96 | has_smpl3d = tf.cast(features['meta/has_3d'], dtype=tf.bool) 97 | return image, image_size, label, center, fname, pose, shape, gt3d, has_smpl3d, has_3djoint 98 | else: 99 | return image, image_size, label, center, fname 100 | 101 | 102 | def rescale_image(image): 103 | """ 104 | Rescales image from [0, 1] to [-1, 1] 105 | Resnet v2 style preprocessing. 106 | """ 107 | # convert to [0, 1]. 108 | image = tf.subtract(image, 0.5) 109 | image = tf.multiply(image, 2.0) 110 | return image 111 | 112 | 113 | def get_all_files(dataset_dir, datasets, split='train'): 114 | # Dataset with different name path 115 | diff_name = ['h36m', 'mpi_inf_3dhp', 'synthetic'] 116 | 117 | data_dirs = [ 118 | join(dataset_dir, dataset, '%s_*.tfrecord' % split) 119 | for dataset in datasets if dataset not in diff_name 120 | ] 121 | if 'h36m' in datasets: 122 | data_dirs.append( 123 | join(dataset_dir, 'human36m_wjoints', split, 124 | '*.tfrecord')) 125 | if 'mpi_inf_3dhp' in datasets: 126 | data_dirs.append( 127 | join(dataset_dir, 'mpi_inf_3dhp', split, '*.tfrecord')) 128 | if 'synthetic' in datasets: 129 | data_dirs.append( 130 | join(dataset_dir, 'synthetic', split, '*.tfrecord')) 131 | 132 | all_files = [] 133 | for data_dir in data_dirs: 134 | all_files += sorted(glob(data_dir)) 135 | return all_files 136 | 137 | 138 | def read_smpl_data(filename_queue): 139 | """ 140 | Parses a smpl Example proto. 141 | It's contents are: 142 | 'pose' : 72-D float 143 | 'shape' : 10-D float 144 | """ 145 | with tf.name_scope(None, 'read_smpl_data', [filename_queue]): 146 | reader = tf.TFRecordReader() 147 | _, example_serialized = reader.read(filename_queue) 148 | 149 | feature_map = { 150 | 'pose': tf.FixedLenFeature((72, ), dtype=tf.float32), 151 | 'shape': tf.FixedLenFeature((10, ), dtype=tf.float32) 152 | } 153 | 154 | features = tf.parse_single_example(example_serialized, feature_map) 155 | pose = tf.cast(features['pose'], dtype=tf.float32) 156 | shape = tf.cast(features['shape'], dtype=tf.float32) 157 | 158 | return pose, shape 159 | 160 | 161 | def decode_jpeg(image_buffer, name=None): 162 | """Decode a JPEG string into one 3-D float image Tensor. 163 | Args: 164 | image_buffer: scalar string Tensor. 165 | name: Optional name for name_scope. 166 | Returns: 167 | 3-D float Tensor with values ranging from [0, 1). 168 | """ 169 | with tf.name_scope(name, 'decode_jpeg', [image_buffer]): 170 | # Decode the string as an RGB JPEG. 171 | # Note that the resulting image contains an unknown height and width 172 | # that is set dynamically by decode_jpeg. In other words, the height 173 | # and width of image is unknown at compile-time. 174 | image = tf.image.decode_jpeg(image_buffer, channels=3) 175 | 176 | # convert to [0, 1]. 177 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 178 | return image 179 | 180 | 181 | def jitter_center(center, trans_max): 182 | with tf.name_scope(None, 'jitter_center', [center, trans_max]): 183 | rand_trans = tf.random_uniform( 184 | [2, 1], minval=-trans_max, maxval=trans_max, dtype=tf.int32) 185 | return center + rand_trans 186 | 187 | 188 | def jitter_scale(image, image_size, keypoints, center, scale_range, output_size, scale_factor=None): 189 | with tf.name_scope(None, 'jitter_scale', [image, image_size, keypoints]): 190 | if scale_factor is None: 191 | scale_factor = tf.random_uniform( 192 | [1], 193 | minval=scale_range[0], 194 | maxval=scale_range[1], 195 | dtype=tf.float32) 196 | new_size = tf.to_int32(tf.to_float(image_size) * scale_factor) 197 | # new_size = tf.maximum(new_size, tf.constant([output_size, output_size])) 198 | new_image = tf.image.resize_images(image, new_size) 199 | 200 | # This is [height, width] -> [y, x] -> [col, row] 201 | actual_factor = tf.to_float( 202 | tf.shape(new_image)[:2]) / tf.to_float(image_size) 203 | x = keypoints[0, :] * actual_factor[1] 204 | y = keypoints[1, :] * actual_factor[0] 205 | 206 | cx = tf.cast(center[0], actual_factor.dtype) * actual_factor[1] 207 | cy = tf.cast(center[1], actual_factor.dtype) * actual_factor[0] 208 | 209 | return new_image, tf.stack([x, y]), tf.cast( 210 | tf.stack([cx, cy]), tf.int32) 211 | 212 | 213 | def pad_image_edge(image, margin): 214 | """ Pads image in each dimension by margin, in numpy: 215 | image_pad = np.pad(image, 216 | ((margin, margin), 217 | (margin, margin), (0, 0)), mode='edge') 218 | tf doesn't have edge repeat mode,, so doing it with tile 219 | Assumes image has 3 channels!! 220 | """ 221 | 222 | def repeat_col(col, num_repeat): 223 | # col is N x 3, ravels 224 | # i.e. to N*3 and repeats, then put it back to num_repeat x N x 3 225 | with tf.name_scope(None, 'repeat_col', [col, num_repeat]): 226 | return tf.reshape( 227 | tf.tile(tf.reshape(col, [-1]), [num_repeat]), 228 | [num_repeat, -1, 3]) 229 | 230 | with tf.name_scope(None, 'pad_image_edge', [image, margin]): 231 | top = repeat_col(image[0, :, :], margin) 232 | bottom = repeat_col(image[-1, :, :], margin) 233 | 234 | image = tf.concat([top, image, bottom], 0) 235 | # Left requires another permute bc how img[:, 0, :]->(h, 3) 236 | left = tf.transpose(repeat_col(image[:, 0, :], margin), perm=[1, 0, 2]) 237 | right = tf.transpose( 238 | repeat_col(image[:, -1, :], margin), perm=[1, 0, 2]) 239 | image = tf.concat([left, image, right], 1) 240 | 241 | return image 242 | 243 | 244 | def random_flip(image, kp, pose=None, gt3d=None): 245 | """ 246 | mirrors image L/R and kp, also pose if supplied 247 | """ 248 | 249 | uniform_random = tf.random_uniform([], 0, 1.0) 250 | mirror_cond = tf.less(uniform_random, -1.) # block flip for multi view 251 | 252 | if pose is not None: 253 | new_image, new_kp, new_pose, new_gt3d = tf.cond( 254 | mirror_cond, lambda: flip_image(image, kp, pose, gt3d), 255 | lambda: (image, kp, pose, gt3d)) 256 | return new_image, new_kp, new_pose, new_gt3d 257 | else: 258 | new_image, new_kp = tf.cond(mirror_cond, lambda: flip_image(image, kp), 259 | lambda: (image, kp)) 260 | return new_image, new_kp 261 | 262 | 263 | def flip_image(image, kp, pose=None, gt3d=None): 264 | """ 265 | Flipping image and kp. 266 | kp is 3 x N! 267 | pose is 72D 268 | gt3d is 14 x 3 269 | """ 270 | image = tf.reverse(image, [1]) 271 | new_kp = kp 272 | 273 | new_x = tf.cast(tf.shape(image)[0], dtype=kp.dtype) - kp[0, :] - 1 274 | new_kp = tf.concat([tf.expand_dims(new_x, 0), kp[1:, :]], 0) 275 | # Swap left and right limbs by gathering them in the right order 276 | # For COCO+ 277 | swap_inds = tf.constant( 278 | [5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13, 14, 16, 15, 18, 17]) 279 | new_kp = tf.transpose(tf.gather(tf.transpose(new_kp), swap_inds)) 280 | 281 | if pose is not None: 282 | new_pose = reflect_pose(pose) 283 | new_gt3d = reflect_joints3d(gt3d) 284 | return image, new_kp, new_pose, new_gt3d 285 | else: 286 | return image, new_kp 287 | 288 | 289 | def reflect_pose(pose): 290 | """ 291 | Input is a 72-Dim vector. 292 | Global rotation (first 3) is left alone. 293 | """ 294 | with tf.name_scope("reflect_pose", [pose]): 295 | """ 296 | # How I got the indices: 297 | right = [11, 8, 5, 2, 14, 17, 19, 21, 23] 298 | left = [10, 7, 4, 1, 13, 16, 18, 20, 22] 299 | new_map = {} 300 | for r_id, l_id in zip(right, left): 301 | for axis in range(0, 3): 302 | rind = r_id * 3 + axis 303 | lind = l_id * 3 + axis 304 | new_map[rind] = lind 305 | new_map[lind] = rind 306 | asis = [id for id in np.arange(0, 24) if id not in right + left] 307 | for a_id in asis: 308 | for axis in range(0, 3): 309 | aind = a_id * 3 + axis 310 | new_map[aind] = aind 311 | swap_inds = np.array([new_map[k] for k in sorted(new_map.keys())]) 312 | """ 313 | swap_inds = tf.constant([ 314 | 0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11, 15, 16, 17, 12, 13, 14, 18, 315 | 19, 20, 24, 25, 26, 21, 22, 23, 27, 28, 29, 33, 34, 35, 30, 31, 32, 316 | 36, 37, 38, 42, 43, 44, 39, 40, 41, 45, 46, 47, 51, 52, 53, 48, 49, 317 | 50, 57, 58, 59, 54, 55, 56, 63, 64, 65, 60, 61, 62, 69, 70, 71, 66, 318 | 67, 68 319 | ], tf.int32) 320 | 321 | # sign_flip = np.tile([1, -1, -1], (24)) (with the first 3 kept) 322 | sign_flip = tf.constant( 323 | [ 324 | 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, 325 | -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, 326 | -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 327 | 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, 328 | -1, 1, -1, -1 329 | ], 330 | dtype=pose.dtype) 331 | 332 | new_pose = tf.gather(pose, swap_inds) * sign_flip 333 | 334 | return new_pose 335 | 336 | 337 | def reflect_joints3d(joints): 338 | """ 339 | Assumes input is 14 x 3 (the LSP skeleton subset of H3.6M) 340 | """ 341 | swap_inds = tf.constant([5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13]) 342 | with tf.name_scope("reflect_joints3d", [joints]): 343 | joints_ref = tf.gather(joints, swap_inds) 344 | flip_mat = tf.constant([[-1, 0, 0], [0, 1, 0], [0, 0, 1]], tf.float32) 345 | joints_ref = tf.transpose( 346 | tf.matmul(flip_mat, joints_ref, transpose_b=True)) 347 | # Assumes all joints3d are mean subtracted 348 | joints_ref = joints_ref - tf.reduce_mean(joints_ref, axis=0) 349 | return joints_ref 350 | -------------------------------------------------------------------------------- /src_ortho/util/data_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/util/data_utils.pyc -------------------------------------------------------------------------------- /src_ortho/util/image.py: -------------------------------------------------------------------------------- 1 | """ 2 | Preprocessing stuff. 3 | """ 4 | import numpy as np 5 | import cv2 6 | 7 | 8 | def resize_img(img, scale_factor): 9 | new_size = (np.floor(np.array(img.shape[0:2]) * scale_factor)).astype(int) 10 | new_img = cv2.resize(img, (new_size[1], new_size[0])) 11 | # This is scale factor of [height, width] i.e. [y, x] 12 | actual_factor = [ 13 | new_size[0] / float(img.shape[0]), new_size[1] / float(img.shape[1]) 14 | ] 15 | return new_img, actual_factor 16 | 17 | 18 | def scale_and_crop(image, scale, center, img_size): 19 | image_scaled, scale_factors = resize_img(image, scale) 20 | # Swap so it's [x, y] 21 | scale_factors = [scale_factors[1], scale_factors[0]] 22 | center_scaled = np.round(center * scale_factors).astype(np.int) 23 | 24 | margin = int(img_size / 2) 25 | image_pad = np.pad( 26 | image_scaled, ((margin, ), (margin, ), (0, )), mode='edge') 27 | center_pad = center_scaled + margin 28 | # figure out starting point 29 | start_pt = center_pad - margin 30 | end_pt = center_pad + margin 31 | # crop: 32 | crop = image_pad[start_pt[1]:end_pt[1], start_pt[0]:end_pt[0], :] 33 | proc_param = { 34 | 'scale': scale, 35 | 'start_pt': start_pt, 36 | 'end_pt': end_pt, 37 | 'img_size': img_size 38 | } 39 | 40 | return crop, proc_param 41 | -------------------------------------------------------------------------------- /src_ortho/util/image.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/util/image.pyc -------------------------------------------------------------------------------- /src_ortho/util/openpose.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to convert openpose output into bbox 3 | """ 4 | import json 5 | import numpy as np 6 | 7 | 8 | def read_json(json_path): 9 | with open(json_path) as f: 10 | data = json.load(f) 11 | kps = [] 12 | for people in data['people']: 13 | kp = np.array(people['pose_keypoints']).reshape(-1, 3) 14 | kps.append(kp) 15 | return kps 16 | 17 | 18 | def get_bbox(json_path, vis_thr=0.2): 19 | kps = read_json(json_path) 20 | # Pick the most confident detection. 21 | scores = [np.mean(kp[kp[:, 2] > vis_thr, 2]) for kp in kps] 22 | kp = kps[np.argmax(scores)] 23 | vis = kp[:, 2] > vis_thr 24 | vis_kp = kp[vis, :2] 25 | min_pt = np.min(vis_kp, axis=0) 26 | max_pt = np.max(vis_kp, axis=0) 27 | person_height = np.linalg.norm(max_pt - min_pt) 28 | if person_height == 0: 29 | print('bad!') 30 | import ipdb 31 | ipdb.set_trace() 32 | center = (min_pt + max_pt) / 2. 33 | scale = 150. / person_height 34 | 35 | return scale, center 36 | -------------------------------------------------------------------------------- /src_ortho/util/openpose.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/util/openpose.pyc -------------------------------------------------------------------------------- /src_ortho/util/renderer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Renders mesh using OpenDr for visualization. 3 | """ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import numpy as np 10 | import cv2 11 | 12 | from opendr.camera import ProjectPoints 13 | from opendr.renderer import ColoredRenderer 14 | from opendr.lighting import LambertianPointLight 15 | 16 | colors = { 17 | # colorbline/print/copy safe: 18 | 'light_blue': [0.65098039, 0.74117647, 0.85882353], 19 | 'light_pink': [.9, .7, .7], # This is used to do no-3d 20 | } 21 | 22 | 23 | class SMPLRenderer(object): 24 | def __init__(self, 25 | img_size=224, 26 | flength=500., 27 | face_path="tf_smpl/smpl_faces.npy"): 28 | self.faces = np.load(face_path) 29 | self.w = img_size 30 | self.h = img_size 31 | self.flength = flength 32 | 33 | def __call__(self, 34 | verts, 35 | cam=None, 36 | img=None, 37 | do_alpha=False, 38 | far=None, 39 | near=None, 40 | color_id=0, 41 | img_size=None): 42 | """ 43 | cam is 3D [f, px, py] 44 | """ 45 | if img is not None: 46 | h, w = img.shape[:2] 47 | elif img_size is not None: 48 | h = img_size[0] 49 | w = img_size[1] 50 | else: 51 | h = self.h 52 | w = self.w 53 | 54 | if cam is None: 55 | cam = [self.flength, w / 2., h / 2.] 56 | 57 | use_cam = ProjectPoints( 58 | f=cam[0] * np.ones(2), 59 | rt=np.zeros(3), 60 | t=np.zeros(3), 61 | k=np.zeros(5), 62 | c=cam[1:3]) 63 | 64 | if near is None: 65 | near = np.maximum(np.min(verts[:, 2]) - 25, 0.1) 66 | if far is None: 67 | far = np.maximum(np.max(verts[:, 2]) + 25, 25) 68 | 69 | imtmp = render_model( 70 | verts, 71 | self.faces, 72 | w, 73 | h, 74 | use_cam, 75 | do_alpha=do_alpha, 76 | img=img, 77 | far=far, 78 | near=near, 79 | color_id=color_id) 80 | 81 | return (imtmp * 255).astype('uint8') 82 | 83 | def rotated(self, 84 | verts, 85 | deg, 86 | cam=None, 87 | axis='y', 88 | img=None, 89 | do_alpha=True, 90 | far=None, 91 | near=None, 92 | color_id=0, 93 | img_size=None): 94 | import math 95 | if axis == 'y': 96 | around = cv2.Rodrigues(np.array([0, math.radians(deg), 0]))[0] 97 | elif axis == 'x': 98 | around = cv2.Rodrigues(np.array([math.radians(deg), 0, 0]))[0] 99 | else: 100 | around = cv2.Rodrigues(np.array([0, 0, math.radians(deg)]))[0] 101 | center = verts.mean(axis=0) 102 | new_v = np.dot((verts - center), around) + center 103 | 104 | return self.__call__( 105 | new_v, 106 | cam, 107 | img=img, 108 | do_alpha=do_alpha, 109 | far=far, 110 | near=near, 111 | img_size=img_size, 112 | color_id=color_id) 113 | 114 | 115 | def _create_renderer(w=640, 116 | h=480, 117 | rt=np.zeros(3), 118 | t=np.zeros(3), 119 | f=None, 120 | c=None, 121 | k=None, 122 | near=.5, 123 | far=10.): 124 | 125 | f = np.array([w, w]) / 2. if f is None else f 126 | c = np.array([w, h]) / 2. if c is None else c 127 | k = np.zeros(5) if k is None else k 128 | 129 | rn = ColoredRenderer() 130 | 131 | rn.camera = ProjectPoints(rt=rt, t=t, f=f, c=c, k=k) 132 | rn.frustum = {'near': near, 'far': far, 'height': h, 'width': w} 133 | return rn 134 | 135 | 136 | def _rotateY(points, angle): 137 | """Rotate the points by a specified angle.""" 138 | ry = np.array([[np.cos(angle), 0., np.sin(angle)], [0., 1., 0.], 139 | [-np.sin(angle), 0., np.cos(angle)]]) 140 | return np.dot(points, ry) 141 | 142 | 143 | def simple_renderer(rn, 144 | verts, 145 | faces, 146 | yrot=np.radians(120), 147 | color=colors['light_pink']): 148 | # Rendered model color 149 | rn.set(v=verts, f=faces, vc=color, bgcolor=np.ones(3)) 150 | albedo = rn.vc 151 | 152 | # Construct Back Light (on back right corner) 153 | rn.vc = LambertianPointLight( 154 | f=rn.f, 155 | v=rn.v, 156 | num_verts=len(rn.v), 157 | light_pos=_rotateY(np.array([-20000, -10000, -10000]), yrot), 158 | vc=albedo, 159 | light_color=np.array([1, 1, 1])) 160 | 161 | # Construct Left Light 162 | rn.vc += LambertianPointLight( 163 | f=rn.f, 164 | v=rn.v, 165 | num_verts=len(rn.v), 166 | light_pos=_rotateY(np.array([80000, 1000, 30000]), yrot), 167 | vc=albedo, 168 | light_color=np.array([1, 1, 1])) 169 | 170 | # Construct Right Light 171 | rn.vc += LambertianPointLight( 172 | f=rn.f, 173 | v=rn.v, 174 | num_verts=len(rn.v), 175 | light_pos=_rotateY(np.array([-50000, 50000, 100000]), yrot), 176 | vc=albedo, 177 | light_color=np.array([.7, .7, .7])) 178 | 179 | return rn.r 180 | 181 | 182 | def get_alpha(imtmp, bgval=1.): 183 | h, w = imtmp.shape[:2] 184 | alpha = (~np.all(imtmp == bgval, axis=2)).astype(imtmp.dtype) 185 | 186 | b_channel, g_channel, r_channel = cv2.split(imtmp) 187 | 188 | im_RGBA = cv2.merge((b_channel, g_channel, r_channel, alpha.astype( 189 | imtmp.dtype))) 190 | return im_RGBA 191 | 192 | 193 | def append_alpha(imtmp): 194 | alpha = np.ones_like(imtmp[:, :, 0]).astype(imtmp.dtype) 195 | if np.issubdtype(imtmp.dtype, np.uint8): 196 | alpha = alpha * 255 197 | b_channel, g_channel, r_channel = cv2.split(imtmp) 198 | im_RGBA = cv2.merge((b_channel, g_channel, r_channel, alpha)) 199 | return im_RGBA 200 | 201 | 202 | def render_model(verts, 203 | faces, 204 | w, 205 | h, 206 | cam, 207 | near=0.5, 208 | far=25, 209 | img=None, 210 | do_alpha=False, 211 | color_id=None): 212 | rn = _create_renderer( 213 | w=w, h=h, near=near, far=far, rt=cam.rt, t=cam.t, f=cam.f, c=cam.c) 214 | 215 | # Uses img as background, otherwise white background. 216 | if img is not None: 217 | rn.background_image = img / 255. if img.max() > 1 else img 218 | 219 | if color_id is None: 220 | color = colors['light_blue'] 221 | else: 222 | color_list = colors.values() 223 | color = color_list[color_id % len(color_list)] 224 | 225 | imtmp = simple_renderer(rn, verts, faces, color=color) 226 | 227 | # If white bg, make transparent. 228 | if img is None and do_alpha: 229 | imtmp = get_alpha(imtmp) 230 | elif img is not None and do_alpha: 231 | imtmp = append_alpha(imtmp) 232 | 233 | return imtmp 234 | 235 | 236 | # ------------------------------ 237 | 238 | 239 | def get_original(proc_param, verts, cam, joints, img_size): 240 | img_size = proc_param['img_size'] 241 | undo_scale = 1. / np.array(proc_param['scale']) 242 | 243 | cam_s = cam[0] 244 | cam_pos = cam[1:] 245 | principal_pt = np.array([img_size, img_size]) / 2. 246 | flength = 50000. 247 | tz = flength / (0.5 * img_size * cam_s) 248 | trans = np.hstack([cam_pos, tz]) 249 | vert_shifted = verts + trans 250 | 251 | start_pt = proc_param['start_pt'] - 0.5 * img_size 252 | final_principal_pt = (principal_pt + start_pt) * undo_scale 253 | cam_for_render = np.hstack( 254 | [np.mean(flength * undo_scale), final_principal_pt]) 255 | 256 | # This is in padded image. 257 | # kp_original = (joints + proc_param['start_pt']) * undo_scale 258 | # Subtract padding from joints. 259 | margin = int(img_size / 2) 260 | kp_original = (joints + proc_param['start_pt'] - margin) * undo_scale 261 | 262 | return cam_for_render, vert_shifted, kp_original 263 | 264 | 265 | def draw_skeleton(input_image, joints, draw_edges=True, vis=None, radius=None): 266 | """ 267 | joints is 3 x 19. but if not will transpose it. 268 | 0: Right ankle 269 | 1: Right knee 270 | 2: Right hip 271 | 3: Left hip 272 | 4: Left knee 273 | 5: Left ankle 274 | 6: Right wrist 275 | 7: Right elbow 276 | 8: Right shoulder 277 | 9: Left shoulder 278 | 10: Left elbow 279 | 11: Left wrist 280 | 12: Neck 281 | 13: Head top 282 | 14: nose 283 | 15: left_eye 284 | 16: right_eye 285 | 17: left_ear 286 | 18: right_ear 287 | """ 288 | import numpy as np 289 | import cv2 290 | 291 | if radius is None: 292 | radius = max(4, (np.mean(input_image.shape[:2]) * 0.01).astype(int)) 293 | 294 | colors = { 295 | 'pink': np.array([197, 27, 125]), # L lower leg 296 | 'light_pink': np.array([233, 163, 201]), # L upper leg 297 | 'light_green': np.array([161, 215, 106]), # L lower arm 298 | 'green': np.array([77, 146, 33]), # L upper arm 299 | 'red': np.array([215, 48, 39]), # head 300 | 'light_red': np.array([252, 146, 114]), # head 301 | 'light_orange': np.array([252, 141, 89]), # chest 302 | 'purple': np.array([118, 42, 131]), # R lower leg 303 | 'light_purple': np.array([175, 141, 195]), # R upper 304 | 'light_blue': np.array([145, 191, 219]), # R lower arm 305 | 'blue': np.array([69, 117, 180]), # R upper arm 306 | 'gray': np.array([130, 130, 130]), # 307 | 'white': np.array([255, 255, 255]), # 308 | } 309 | 310 | image = input_image.copy() 311 | input_is_float = False 312 | 313 | if np.issubdtype(image.dtype, np.float): 314 | input_is_float = True 315 | max_val = image.max() 316 | if max_val <= 2.: # should be 1 but sometimes it's slightly above 1 317 | image = (image * 255).astype(np.uint8) 318 | else: 319 | image = (image).astype(np.uint8) 320 | 321 | if joints.shape[0] != 2: 322 | joints = joints.T 323 | joints = np.round(joints).astype(int) 324 | 325 | jcolors = [ 326 | 'light_pink', 'light_pink', 'light_pink', 'pink', 'pink', 'pink', 327 | 'light_blue', 'light_blue', 'light_blue', 'blue', 'blue', 'blue', 328 | 'purple', 'purple', 'red', 'green', 'green', 'white', 'white' 329 | ] 330 | 331 | if joints.shape[1] == 19: 332 | # parent indices -1 means no parents 333 | parents = np.array([ 334 | 1, 2, 8, 9, 3, 4, 7, 8, 12, 12, 9, 10, 14, -1, 13, -1, -1, 15, 16 335 | ]) 336 | # Left is light and right is dark 337 | ecolors = { 338 | 0: 'light_pink', 339 | 1: 'light_pink', 340 | 2: 'light_pink', 341 | 3: 'pink', 342 | 4: 'pink', 343 | 5: 'pink', 344 | 6: 'light_blue', 345 | 7: 'light_blue', 346 | 8: 'light_blue', 347 | 9: 'blue', 348 | 10: 'blue', 349 | 11: 'blue', 350 | 12: 'purple', 351 | 17: 'light_green', 352 | 18: 'light_green', 353 | 14: 'purple' 354 | } 355 | elif joints.shape[1] == 14: 356 | parents = np.array([ 357 | 1, 358 | 2, 359 | 8, 360 | 9, 361 | 3, 362 | 4, 363 | 7, 364 | 8, 365 | -1, 366 | -1, 367 | 9, 368 | 10, 369 | 13, 370 | -1, 371 | ]) 372 | ecolors = { 373 | 0: 'light_pink', 374 | 1: 'light_pink', 375 | 2: 'light_pink', 376 | 3: 'pink', 377 | 4: 'pink', 378 | 5: 'pink', 379 | 6: 'light_blue', 380 | 7: 'light_blue', 381 | 10: 'light_blue', 382 | 11: 'blue', 383 | 12: 'purple' 384 | } 385 | else: 386 | print('Unknown skeleton!!') 387 | import ipdb 388 | ipdb.set_trace() 389 | 390 | for child in xrange(len(parents)): 391 | point = joints[:, child] 392 | # If invisible skip 393 | if vis is not None and vis[child] == 0: 394 | continue 395 | if draw_edges: 396 | cv2.circle(image, (point[0], point[1]), radius, colors['white'], 397 | -1) 398 | cv2.circle(image, (point[0], point[1]), radius - 1, 399 | colors[jcolors[child]], -1) 400 | else: 401 | # cv2.circle(image, (point[0], point[1]), 5, colors['white'], 1) 402 | cv2.circle(image, (point[0], point[1]), radius - 1, 403 | colors[jcolors[child]], 1) 404 | # cv2.circle(image, (point[0], point[1]), 5, colors['gray'], -1) 405 | pa_id = parents[child] 406 | if draw_edges and pa_id >= 0: 407 | if vis is not None and vis[pa_id] == 0: 408 | continue 409 | point_pa = joints[:, pa_id] 410 | cv2.circle(image, (point_pa[0], point_pa[1]), radius - 1, 411 | colors[jcolors[pa_id]], -1) 412 | if child not in ecolors.keys(): 413 | print('bad') 414 | import ipdb 415 | ipdb.set_trace() 416 | cv2.line(image, (point[0], point[1]), (point_pa[0], point_pa[1]), 417 | colors[ecolors[child]], radius - 2) 418 | 419 | # Convert back in original dtype 420 | if input_is_float: 421 | if max_val <= 1.: 422 | image = image.astype(np.float32) / 255. 423 | else: 424 | image = image.astype(np.float32) 425 | 426 | return image 427 | 428 | 429 | def draw_text(input_image, content): 430 | """ 431 | content is a dict. draws key: val on image 432 | Assumes key is str, val is float 433 | """ 434 | import numpy as np 435 | import cv2 436 | image = input_image.copy() 437 | input_is_float = False 438 | if np.issubdtype(image.dtype, np.float): 439 | input_is_float = True 440 | image = (image * 255).astype(np.uint8) 441 | 442 | black = np.array([0, 0, 0]) 443 | margin = 15 444 | start_x = 5 445 | start_y = margin 446 | for key in sorted(content.keys()): 447 | text = "%s: %.2g" % (key, content[key]) 448 | cv2.putText(image, text, (start_x, start_y), 0, 0.45, black) 449 | start_y += margin 450 | 451 | if input_is_float: 452 | image = image.astype(np.float32) / 255. 453 | return image 454 | -------------------------------------------------------------------------------- /src_ortho/util/renderer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamljb/HumanMultiView/b9f42220694973926e35cb8beb993b8479fb8f98/src_ortho/util/renderer.pyc --------------------------------------------------------------------------------