├── utils ├── __init__.py └── utils.py ├── models ├── __init__.py ├── nets │ ├── __init__.py │ └── vnect_model_bn_folded.py └── vnect_model.py ├── test_imgs └── yuniko.jpg ├── run_demo_tf.sh ├── run_demo_tf_gl.sh ├── README.md ├── pyqt_test.py ├── caffe_weights_to_pickle.py ├── plotly_test.py ├── utils.py ├── demo.py ├── demo_gl.py ├── vispy_test.py ├── demo_tf_gl.py ├── demo_tf.py ├── LICENSE └── demo_multithread.py /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /models/nets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_imgs/yuniko.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timctho/VNect-tensorflow/HEAD/test_imgs/yuniko.jpg -------------------------------------------------------------------------------- /run_demo_tf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python demo_tf.py --device=gpu \ 4 | --demo_type=image \ 5 | --test_img=test_imgs/yuniko.jpg \ 6 | --model_file=models/weights/vnect_tf \ 7 | --plot_2d=True \ 8 | --plot_3d=True 9 | -------------------------------------------------------------------------------- /run_demo_tf_gl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python demo_tf_gl.py --device=gpu \ 4 | --demo_type=webcam \ 5 | --test_img=test_imgs/yuniko.jpg \ 6 | --model_file=models/weights/vnect_tf \ 7 | --plot_2d=True \ 8 | --plot_3d=True 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VNect -- Tensorflow version 2 | This project is the tensorflow implementation of [VNect: Real-time 3D Human Pose Estimation with a Single RGB Camera](http://gvv.mpi-inf.mpg.de/projects/VNect/), SIGGRAPH 2017. 3 | 4 | This is not an official implementation. Please contact paper author for related model. 5 | 6 | ## Environments 7 | - Ubuntu 16.04 8 | - Python 2.7 9 | - Tensorflow 1.3.0 10 | - OpenCV 3.3.0 11 | - OpenGL (optional) 12 | 13 | ## Inference 14 | - 1.Download model, put them in folder `models/weights` 15 | - 2.Edit demo settings in shell script, `--device` `--demo_type` `--model_file` `--test_img` `--plot_2d` `--plot_3d` 16 | - 3.If you have OpenGL, you can run `run_demo_tf_gl.sh` for faster rendering of 3d joints. Otherwise, run `run_demo_tf.sh` 17 | 18 | # TODO 19 | - Some bugs in detected 3D joint locations. 20 | - Training part of model. 21 | 22 | 23 | -------------------------------------------------------------------------------- /pyqt_test.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | from pygame.locals import * 3 | 4 | from OpenGL.GL import * 5 | from OpenGL.GLU import * 6 | import time 7 | 8 | verticies = ( 9 | (1, -1, -1), 10 | (1, 1, -1), 11 | (-1, 1, -1), 12 | (-1, -1, -1), 13 | (1, -1, 1), 14 | (1, 1, 1), 15 | (-1, -1, 1), 16 | (-1, 1, 1) 17 | ) 18 | 19 | edges = ( 20 | (0,2), 21 | (0,3), 22 | (0,4), 23 | (2,1), 24 | (2,3), 25 | (2,7), 26 | (6,3), 27 | (6,4), 28 | (6,7), 29 | (5,1), 30 | (5,4), 31 | (5,7) 32 | ) 33 | 34 | 35 | def Cube(): 36 | glBegin(GL_LINES) 37 | for edge in edges: 38 | for vertex in edge: 39 | glVertex3fv(verticies[vertex]) 40 | glEnd() 41 | 42 | 43 | def main(): 44 | pygame.init() 45 | display = (800,600) 46 | pygame.display.set_mode(display, DOUBLEBUF|OPENGL) 47 | 48 | gluPerspective(45, (display[0]/display[1]), 0.1, 500.0) 49 | 50 | glTranslatef(0.0,0.0, -5) 51 | 52 | while True: 53 | t1 = time.time() 54 | for event in pygame.event.get(): 55 | if event.type == pygame.QUIT: 56 | pygame.quit() 57 | quit() 58 | 59 | glRotatef(1, 3, 1, 1) 60 | glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT) 61 | Cube() 62 | pygame.display.flip() 63 | print('FPS', 1/(time.time()-t1)) 64 | 65 | pygame.time.wait(1) 66 | 67 | main() -------------------------------------------------------------------------------- /caffe_weights_to_pickle.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import numpy as np 3 | import pickle 4 | import argparse 5 | from collections import OrderedDict 6 | 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--prototxt', 10 | default='models/vnect_net.prototxt') 11 | parser.add_argument('--caffemodel', 12 | default='models/vnect_model.caffemodel') 13 | parser.add_argument('--output_file', 14 | default='vnect.pkl') 15 | args = parser.parse_args() 16 | 17 | if __name__ == '__main__': 18 | 19 | pkl_weights = OrderedDict() 20 | 21 | net = caffe.Net(args.prototxt, 22 | caffe.TEST, 23 | weights=args.caffemodel) 24 | 25 | for layer in net.params.keys(): 26 | print(layer) 27 | 28 | print('======') 29 | cur_bn_name = '' 30 | for layer in net.params.keys(): 31 | print(layer, len(net.params[layer])) 32 | 33 | for i in range(len(net.params[layer])): 34 | print(net.params[layer][i].data.shape) 35 | 36 | if layer.startswith('bn'): 37 | cur_bn_name = layer 38 | pkl_weights[layer+'/moving_mean'] = np.asarray(net.params[layer][0].data) / net.params[layer][2].data 39 | pkl_weights[layer+'/moving_variance'] = np.asarray(net.params[layer][1].data) / net.params[layer][2].data 40 | elif layer.startswith('scale'): 41 | pkl_weights[cur_bn_name+'/gamma'] = np.asarray(net.params[layer][0].data) 42 | pkl_weights[cur_bn_name+'/beta'] = np.asarray(net.params[layer][1].data) 43 | elif len(net.params[layer]) == 2: 44 | pkl_weights[layer+'/weights'] = np.asarray(net.params[layer][0].data).transpose((2,3,1,0)) 45 | pkl_weights[layer+'/biases'] = np.asarray(net.params[layer][1].data) 46 | elif len(net.params[layer]) == 1: 47 | pkl_weights[layer+'/kernel'] = np.asarray(net.params[layer][0].data).transpose((2,3,1,0)) 48 | 49 | for layer in pkl_weights.keys(): 50 | print(layer, pkl_weights[layer].shape) 51 | 52 | with open(args.output_file, 'wb') as f: 53 | pickle.dump(pkl_weights, f) 54 | -------------------------------------------------------------------------------- /plotly_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | from pyqtgraph.Qt import QtCore, QtGui 4 | import numpy as np 5 | import pyqtgraph as pg 6 | 7 | 8 | class App(QtGui.QMainWindow): 9 | def __init__(self, parent=None): 10 | super(App, self).__init__(parent) 11 | 12 | #### Create Gui Elements ########### 13 | self.mainbox = QtGui.QWidget() 14 | self.setCentralWidget(self.mainbox) 15 | self.mainbox.setLayout(QtGui.QVBoxLayout()) 16 | 17 | self.canvas = pg.GraphicsLayoutWidget() 18 | self.mainbox.layout().addWidget(self.canvas) 19 | 20 | self.label = QtGui.QLabel() 21 | self.mainbox.layout().addWidget(self.label) 22 | 23 | self.view = self.canvas.addViewBox() 24 | self.view.setAspectLocked(True) 25 | self.view.setRange(QtCore.QRectF(0,0, 100, 100)) 26 | 27 | # image plot 28 | self.img = pg.ImageItem(border='w') 29 | self.view.addItem(self.img) 30 | 31 | self.canvas.nextRow() 32 | # line plot 33 | self.otherplot = self.canvas.addPlot() 34 | self.h2 = self.otherplot.plot(pen='y') 35 | 36 | 37 | #### Set Data ##################### 38 | 39 | self.x = np.linspace(0,50., num=100) 40 | self.X,self.Y = np.meshgrid(self.x,self.x) 41 | 42 | self.counter = 0 43 | self.fps = 0. 44 | self.lastupdate = time.time() 45 | 46 | #### Start ##################### 47 | self._update() 48 | 49 | def _update(self): 50 | 51 | self.data = np.sin(self.X/3.+self.counter/9.)*np.cos(self.Y/3.+self.counter/9.) 52 | self.ydata = np.sin(self.x/3.+ self.counter/9.) 53 | 54 | self.img.setImage(self.data) 55 | self.h2.setData(self.ydata) 56 | 57 | now = time.time() 58 | dt = (now-self.lastupdate) 59 | if dt <= 0: 60 | dt = 0.000000000001 61 | fps2 = 1.0 / dt 62 | self.lastupdate = now 63 | self.fps = self.fps * 0.9 + fps2 * 0.1 64 | tx = 'Mean Frame Rate: {fps:.3f} FPS'.format(fps=self.fps ) 65 | self.label.setText(tx) 66 | QtCore.QTimer.singleShot(1, self._update) 67 | self.counter += 1 68 | 69 | 70 | if __name__ == '__main__': 71 | 72 | app = QtGui.QApplication(sys.argv) 73 | thisapp = App() 74 | thisapp.show() 75 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import math 4 | from OpenGL.GL import * 5 | 6 | 7 | 8 | def read_square_image(file, cam, boxsize, type): 9 | # from file 10 | if type == 'IMAGE': 11 | oriImg = cv2.imread(file) 12 | # from webcam 13 | elif type == 'WEBCAM': 14 | _, oriImg = cam.read() 15 | 16 | scale = boxsize / (oriImg.shape[0] * 1.0) 17 | imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4) 18 | 19 | output_img = np.ones((boxsize, boxsize, 3)) * 128 20 | 21 | if imageToTest.shape[1] < boxsize: 22 | offset = imageToTest.shape[1] % 2 23 | output_img[:, int(boxsize/2-math.ceil(imageToTest.shape[1]/2)):int(boxsize/2+math.ceil(imageToTest.shape[1]/2)+offset), :] = imageToTest 24 | else: 25 | output_img = imageToTest[:, int(imageToTest.shape[1]/2-boxsize/2):int(imageToTest.shape[1]/2+boxsize/2), :] 26 | return output_img 27 | 28 | def resize_pad_img(img, scale, output_size): 29 | resized_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) 30 | pad_h = (output_size - resized_img.shape[0]) // 2 31 | pad_w = (output_size - resized_img.shape[1]) // 2 32 | pad_h_offset = (output_size - resized_img.shape[0]) % 2 33 | pad_w_offset = (output_size - resized_img.shape[1]) % 2 34 | resized_pad_img = np.pad(resized_img, ((pad_w, pad_w+pad_w_offset), (pad_h, pad_h+pad_h_offset), (0, 0)), 35 | mode='constant', constant_values=128) 36 | 37 | return resized_pad_img 38 | 39 | 40 | def draw_predicted_heatmap(heatmap, input_size): 41 | heatmap_resized = cv2.resize(heatmap, (input_size, input_size)) 42 | 43 | output_img = None 44 | tmp_concat_img = None 45 | h_count = 0 46 | for joint_num in range(heatmap_resized.shape[2]): 47 | if h_count < 4: 48 | tmp_concat_img = np.concatenate((tmp_concat_img, heatmap_resized[:, :, joint_num]), axis=1) \ 49 | if tmp_concat_img is not None else heatmap_resized[:, :, joint_num] 50 | h_count += 1 51 | else: 52 | output_img = np.concatenate((output_img, tmp_concat_img), axis=0) if output_img is not None else tmp_concat_img 53 | tmp_concat_img = None 54 | h_count = 0 55 | # last row img 56 | if h_count != 0: 57 | while h_count < 4: 58 | tmp_concat_img = np.concatenate((tmp_concat_img, np.zeros(shape=(input_size, input_size), dtype=np.float32)), axis=1) 59 | h_count += 1 60 | output_img = np.concatenate((output_img, tmp_concat_img), axis=0) 61 | 62 | # adjust heatmap color 63 | output_img = output_img.astype(np.uint8) 64 | output_img = cv2.applyColorMap(output_img, cv2.COLORMAP_JET) 65 | return output_img 66 | 67 | def extract_2d_joint_from_heatmap(heatmap, input_size, joints_2d): 68 | heatmap_resized = cv2.resize(heatmap, (input_size, input_size)) 69 | 70 | for joint_num in range(heatmap_resized.shape[2]): 71 | joint_coord = np.unravel_index(np.argmax(heatmap_resized[:, :, joint_num]), (input_size, input_size)) 72 | joints_2d[joint_num, :] = joint_coord 73 | 74 | return 75 | 76 | 77 | def extract_3d_joints_from_heatmap(joints_2d, x_hm, y_hm, z_hm, input_size, joints_3d): 78 | 79 | for joint_num in range(x_hm.shape[2]): 80 | coord_2d_x = joints_2d[joint_num][0] 81 | coord_2d_y = joints_2d[joint_num][1] 82 | 83 | joint_x = x_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10 84 | joint_y = y_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10 85 | joint_z = z_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10 86 | joints_3d[joint_num, 0] = joint_x 87 | joints_3d[joint_num, 1] = joint_y 88 | joints_3d[joint_num, 2] = joint_z 89 | joints_3d -= joints_3d[14, :] 90 | 91 | return 92 | 93 | def draw_limbs_2d(img, joints_2d, limb_parents): 94 | for limb_num in range(len(limb_parents)-1): 95 | x1 = joints_2d[limb_num, 0] 96 | y1 = joints_2d[limb_num, 1] 97 | x2 = joints_2d[limb_parents[limb_num], 0] 98 | y2 = joints_2d[limb_parents[limb_num], 1] 99 | length = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5 100 | # if length < 10000 and length > 5: 101 | deg = math.degrees(math.atan2(x1 - x2, y1 - y2)) 102 | polygon = cv2.ellipse2Poly((int((y1 + y2) / 2), int((x1 + x2) / 2)), 103 | (int(length / 2), 3), 104 | int(deg), 105 | 0, 360, 1) 106 | cv2.fillConvexPoly(img, polygon, color=(0,255,0)) 107 | return 108 | 109 | def draw_limbs_3d(joints_3d, limb_parents, ax): 110 | 111 | for i in range(joints_3d.shape[0]): 112 | x_pair = [joints_3d[i, 0], joints_3d[limb_parents[i], 0]] 113 | y_pair = [joints_3d[i, 1], joints_3d[limb_parents[i], 1]] 114 | z_pair = [joints_3d[i, 2], joints_3d[limb_parents[i], 2]] 115 | ax.plot(x_pair, y_pair, zs=z_pair, linewidth=3) 116 | 117 | 118 | def draw_limbs_3d_gl(joints_3d, limb_parents): 119 | 120 | glLineWidth(2) 121 | glBegin(GL_LINES) 122 | glColor3f(1,0,0) 123 | glVertex3fv((0,0,0)) 124 | glVertex3fv((100,0,0)) 125 | glColor3f(0,1,0) 126 | glVertex3fv((0,0,0)) 127 | glVertex3fv((0,100,0)) 128 | glColor3f(0,0,1) 129 | glVertex3fv((0,0,0)) 130 | glVertex3fv((0,0,100)) 131 | glEnd() 132 | 133 | glColor3f(1,1,1) 134 | glBegin(GL_LINES) 135 | for i in range(joints_3d.shape[0]): 136 | glVertex3fv((joints_3d[i, 0], joints_3d[i, 1], joints_3d[i, 2])) 137 | glVertex3fv((joints_3d[limb_parents[i], 0], joints_3d[limb_parents[i], 1], joints_3d[limb_parents[i], 2])) 138 | glEnd() 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import math 4 | import matplotlib.pyplot as plt 5 | from mpl_toolkits.mplot3d import Axes3D 6 | from OpenGL.GL import * 7 | from OpenGL.GLU import * 8 | 9 | 10 | 11 | def read_square_image(file, cam, boxsize, type): 12 | # from file 13 | if type == 'IMAGE': 14 | oriImg = cv2.imread(file) 15 | # from webcam 16 | elif type == 'WEBCAM': 17 | _, oriImg = cam.read() 18 | 19 | scale = boxsize / (oriImg.shape[0] * 1.0) 20 | imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4) 21 | 22 | output_img = np.ones((boxsize, boxsize, 3)) * 128 23 | 24 | if imageToTest.shape[1] < boxsize: 25 | offset = imageToTest.shape[1] % 2 26 | output_img[:, int(boxsize/2-math.ceil(imageToTest.shape[1]/2)):int(boxsize/2+math.ceil(imageToTest.shape[1]/2)+offset), :] = imageToTest 27 | else: 28 | output_img = imageToTest[:, int(imageToTest.shape[1]/2-boxsize/2):int(imageToTest.shape[1]/2+boxsize/2), :] 29 | return output_img 30 | 31 | def resize_pad_img(img, scale, output_size): 32 | resized_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) 33 | pad_h = (output_size - resized_img.shape[0]) // 2 34 | pad_w = (output_size - resized_img.shape[1]) // 2 35 | pad_h_offset = (output_size - resized_img.shape[0]) % 2 36 | pad_w_offset = (output_size - resized_img.shape[1]) % 2 37 | resized_pad_img = np.pad(resized_img, ((pad_w, pad_w+pad_w_offset), (pad_h, pad_h+pad_h_offset), (0, 0)), 38 | mode='constant', constant_values=128) 39 | 40 | return resized_pad_img 41 | 42 | 43 | def draw_predicted_heatmap(heatmap, input_size): 44 | heatmap_resized = cv2.resize(heatmap, (input_size, input_size)) 45 | 46 | output_img = None 47 | tmp_concat_img = None 48 | h_count = 0 49 | for joint_num in range(heatmap_resized.shape[2]): 50 | if h_count < 4: 51 | tmp_concat_img = np.concatenate((tmp_concat_img, heatmap_resized[:, :, joint_num]), axis=1) \ 52 | if tmp_concat_img is not None else heatmap_resized[:, :, joint_num] 53 | h_count += 1 54 | else: 55 | output_img = np.concatenate((output_img, tmp_concat_img), axis=0) if output_img is not None else tmp_concat_img 56 | tmp_concat_img = None 57 | h_count = 0 58 | # last row img 59 | if h_count != 0: 60 | while h_count < 4: 61 | tmp_concat_img = np.concatenate((tmp_concat_img, np.zeros(shape=(input_size, input_size), dtype=np.float32)), axis=1) 62 | h_count += 1 63 | output_img = np.concatenate((output_img, tmp_concat_img), axis=0) 64 | 65 | # adjust heatmap color 66 | output_img = output_img.astype(np.uint8) 67 | output_img = cv2.applyColorMap(output_img, cv2.COLORMAP_JET) 68 | return output_img 69 | 70 | def extract_2d_joint_from_heatmap(heatmap, input_size, joints_2d): 71 | heatmap_resized = cv2.resize(heatmap, (input_size, input_size)) 72 | 73 | for joint_num in range(heatmap_resized.shape[2]): 74 | joint_coord = np.unravel_index(np.argmax(heatmap_resized[:, :, joint_num]), (input_size, input_size)) 75 | joints_2d[joint_num, :] = joint_coord 76 | 77 | return joints_2d 78 | 79 | 80 | def extract_3d_joints_from_heatmap(joints_2d, x_hm, y_hm, z_hm, input_size, joints_3d): 81 | 82 | for joint_num in range(x_hm.shape[2]): 83 | coord_2d_x = joints_2d[joint_num][0] 84 | coord_2d_y = joints_2d[joint_num][1] 85 | 86 | # x_hm_resized = cv2.resize(x_hm, (input_size, input_size)) 87 | # y_hm_resized = cv2.resize(y_hm, (input_size, input_size)) 88 | # z_hm_resized = cv2.resize(z_hm, (input_size, input_size)) 89 | # joint_x = x_hm_resized[max(int(coord_2d_x), 1), max(int(coord_2d_y), 1), joint_num] * 100 90 | # joint_y = y_hm_resized[max(int(coord_2d_x), 1), max(int(coord_2d_y), 1), joint_num] * 100 91 | # joint_z = z_hm_resized[max(int(coord_2d_x), 1), max(int(coord_2d_y), 1), joint_num] * 100 92 | 93 | 94 | joint_x = x_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10 95 | joint_y = y_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10 96 | joint_z = z_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10 97 | joints_3d[joint_num, 0] = joint_x 98 | joints_3d[joint_num, 1] = joint_y 99 | joints_3d[joint_num, 2] = joint_z 100 | joints_3d -= joints_3d[14, :] 101 | 102 | return joints_3d 103 | 104 | def draw_limbs_2d(img, joints_2d, limb_parents): 105 | for limb_num in range(len(limb_parents)-1): 106 | x1 = joints_2d[limb_num, 0] 107 | y1 = joints_2d[limb_num, 1] 108 | x2 = joints_2d[limb_parents[limb_num], 0] 109 | y2 = joints_2d[limb_parents[limb_num], 1] 110 | length = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5 111 | # if length < 10000 and length > 5: 112 | deg = math.degrees(math.atan2(x1 - x2, y1 - y2)) 113 | polygon = cv2.ellipse2Poly((int((y1 + y2) / 2), int((x1 + x2) / 2)), 114 | (int(length / 2), 3), 115 | int(deg), 116 | 0, 360, 1) 117 | cv2.fillConvexPoly(img, polygon, color=(0,255,0)) 118 | return img 119 | 120 | def draw_limbs_3d(joints_3d, limb_parents, ax): 121 | 122 | for i in range(joints_3d.shape[0]): 123 | x_pair = [joints_3d[i, 0], joints_3d[limb_parents[i], 0]] 124 | y_pair = [joints_3d[i, 1], joints_3d[limb_parents[i], 1]] 125 | z_pair = [joints_3d[i, 2], joints_3d[limb_parents[i], 2]] 126 | ax.plot(x_pair, y_pair, zs=z_pair, linewidth=3) 127 | 128 | 129 | def draw_limb_3d_gl(joints_3d, limb_parents): 130 | glBegin(GL_LINES) 131 | for i in range(joints_3d.shape[0]): 132 | print(joints_3d[i, :]) 133 | glVertex3fv((joints_3d[i, 0], joints_3d[i, 1], joints_3d[i, 2])) 134 | glVertex3fv((joints_3d[limb_parents[i], 0], joints_3d[limb_parents[i], 1], joints_3d[limb_parents[i], 2])) 135 | glEnd() 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import argparse 3 | import os 4 | import cv2 5 | import numpy as np 6 | import time 7 | import matplotlib.pyplot as plt 8 | from mpl_toolkits.mplot3d import Axes3D 9 | 10 | import utils 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('--device', default='gpu') 14 | parser.add_argument('--model_dir', default='') 15 | parser.add_argument('--input_size', default=368) 16 | parser.add_argument('--num_of_joints', default=21) 17 | parser.add_argument('--pool_scale', default=8) 18 | parser.add_argument('--plot_2d', default=True) 19 | parser.add_argument('--plot_3d', default=True) 20 | args = parser.parse_args() 21 | 22 | joint_color_code = [[139, 53, 255], 23 | [0, 56, 255], 24 | [43, 140, 237], 25 | [37, 168, 36], 26 | [147, 147, 0], 27 | [70, 17, 145]] 28 | 29 | # Limb parents of each joint 30 | limb_parents = [1, 15, 1, 2, 3, 1, 5, 6, 14, 8, 9, 14, 11, 12, 14, 14, 1, 4, 7, 10, 13] 31 | 32 | # input scales 33 | scales = [1.0, 0.7] 34 | 35 | 36 | def demo(): 37 | joints_2d = np.zeros(shape=(args.num_of_joints, 2), dtype=np.int32) 38 | joints_3d = np.zeros(shape=(args.num_of_joints, 3), dtype=np.float32) 39 | 40 | if args.plot_3d: 41 | plt.ion() 42 | fig = plt.figure() 43 | ax = fig.add_subplot(121, projection='3d') 44 | ax2 = fig.add_subplot(122) 45 | plt.show() 46 | 47 | if args.device == 'cpu': 48 | caffe.set_mode_cpu() 49 | elif args.device == 'gpu': 50 | caffe.set_mode_gpu() 51 | caffe.set_device(0) 52 | else: 53 | raise ValueError('No such device') 54 | 55 | model_prototxt_path = os.path.join(args.model_dir, 'vnect_net.prototxt') 56 | model_weight_path = os.path.join(args.model_dir, 'vnect_model.caffemodel') 57 | 58 | # Load model 59 | model = caffe.Net(model_prototxt_path, 60 | model_weight_path, 61 | caffe.TEST) 62 | 63 | # Show network structure and shape 64 | for layer_name in model.params.keys(): 65 | print(layer_name, model.params[layer_name][0].data.shape) 66 | print('') 67 | 68 | for i in model.blobs.keys(): 69 | print(i, model.blobs[i].data.shape) 70 | 71 | cam = cv2.VideoCapture(0) 72 | is_tracking = False 73 | # for img_name in os.listdir('test_imgs'): 74 | while True: 75 | # if not is_tracking: 76 | 77 | img_path = 'test_imgs/{}'.format('dance.jpg') 78 | t1 = time.time() 79 | input_batch = [] 80 | 81 | cam_img = utils.read_square_image('', cam, args.input_size, 'WEBCAM') 82 | # cam_img = utils.read_square_image(img_path, '', args.input_size, 'IMAGE') 83 | # cv2.imshow('', cam_img) 84 | # cv2.waitKey(0) 85 | orig_size_input = cam_img.astype(np.float32) 86 | 87 | for scale in scales: 88 | resized_img = utils.resize_pad_img(orig_size_input, scale, args.input_size) 89 | input_batch.append(resized_img) 90 | 91 | input_batch = np.asarray(input_batch, dtype=np.float32) 92 | input_batch = np.transpose(input_batch, (0, 3, 1, 2)) 93 | input_batch /= 255.0 94 | input_batch -= 0.4 95 | 96 | model.blobs['data'].data[...] = input_batch 97 | 98 | # Forward 99 | model.forward() 100 | 101 | # Get output data 102 | x_hm = model.blobs['x_heatmap'].data 103 | y_hm = model.blobs['y_heatmap'].data 104 | z_hm = model.blobs['z_heatmap'].data 105 | hm = model.blobs['heatmap'].data 106 | 107 | # Trans coordinates 108 | x_hm = x_hm.transpose([0, 2, 3, 1]) 109 | y_hm = y_hm.transpose([0, 2, 3, 1]) 110 | z_hm = z_hm.transpose([0, 2, 3, 1]) 111 | hm = hm.transpose([0, 2, 3, 1]) 112 | 113 | # Average scale outputs 114 | hm_size = args.input_size // args.pool_scale 115 | hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 116 | x_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 117 | y_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 118 | z_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 119 | for i in range(len(scales)): 120 | rescale = 1.0 / scales[i] 121 | scaled_hm = cv2.resize(hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 122 | scaled_x_hm = cv2.resize(x_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 123 | scaled_y_hm = cv2.resize(y_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 124 | scaled_z_hm = cv2.resize(z_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 125 | mid = [scaled_hm.shape[0] // 2, scaled_hm.shape[1] // 2] 126 | hm_avg += scaled_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 127 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 128 | x_hm_avg += scaled_x_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 129 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 130 | y_hm_avg += scaled_y_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 131 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 132 | z_hm_avg += scaled_z_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 133 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 134 | hm_avg /= len(scales) 135 | x_hm_avg /= len(scales) 136 | y_hm_avg /= len(scales) 137 | z_hm_avg /= len(scales) 138 | 139 | t2 = time.time() 140 | # Get 2d joints 141 | joints_2d = utils.extract_2d_joint_from_heatmap(hm_avg, args.input_size, joints_2d) 142 | 143 | # Get 3d joints 144 | joints_3d = utils.extract_3d_joints_from_heatmap(joints_2d, x_hm_avg, y_hm_avg, z_hm_avg, args.input_size, 145 | joints_3d) 146 | print('Post FPS', 1/(time.time()-t2)) 147 | 148 | # Plot 2d location heatmap 149 | joint_map = np.zeros(shape=(args.input_size, args.input_size, 3)) 150 | for joint_num in range(joints_2d.shape[0]): 151 | cv2.circle(joint_map, center=(joints_2d[joint_num][1], joints_2d[joint_num][0]), radius=3, 152 | color=(255, 0, 0), thickness=-1) 153 | 154 | # Plot 2d limbs 155 | limb_img = utils.draw_limbs_2d(cam_img, joints_2d, limb_parents) 156 | 157 | # Plot 3d limbs 158 | if args.plot_3d: 159 | ax.clear() 160 | ax.view_init(azim=0, elev=90) 161 | ax.set_xlim(-700, 700) 162 | ax.set_ylim(-800, 800) 163 | ax.set_zlim(-700, 700) 164 | ax.set_xlabel('x') 165 | ax.set_ylabel('y') 166 | ax.set_zlabel('z') 167 | utils.draw_limbs_3d(joints_3d, limb_parents, ax) 168 | 169 | # draw heatmap 170 | # hm_img = utils.draw_predicted_heatmap(hm_avg*200, args.input_size) 171 | # cv2.imshow('hm', hm_img.astype(np.uint8)) 172 | # cv2.waitKey(0) 173 | 174 | 175 | concat_img = np.concatenate((limb_img, joint_map), axis=1) 176 | 177 | # ax2.imshow(concat_img[..., ::-1].astype(np.uint8)) 178 | cv2.imshow('2d', concat_img.astype(np.uint8)) 179 | cv2.waitKey(1) 180 | # ax2.imshow(concat_img.astype(np.uint8)) 181 | # plt.pause(0.0001) 182 | # plt.show(block=False) 183 | print('Forward FPS', 1 / (time.time() - t1)) 184 | 185 | 186 | if __name__ == '__main__': 187 | demo() 188 | -------------------------------------------------------------------------------- /demo_gl.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import argparse 3 | import os 4 | import cv2 5 | import numpy as np 6 | import time 7 | import matplotlib.pyplot as plt 8 | from mpl_toolkits.mplot3d import Axes3D 9 | import pygame 10 | from pygame.locals import * 11 | from OpenGL.GL import * 12 | from OpenGL.GLU import * 13 | 14 | import utils 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('--device', default='gpu') 18 | parser.add_argument('--model_dir', default='/media/tim_ho/HDD1/Projects/VNect-tensorflow/models') 19 | parser.add_argument('--input_size', default=368) 20 | parser.add_argument('--num_of_joints', default=21) 21 | parser.add_argument('--pool_scale', default=8) 22 | parser.add_argument('--plot_2d', default=False) 23 | parser.add_argument('--plot_3d', default=False) 24 | args = parser.parse_args() 25 | 26 | joint_color_code = [[139, 53, 255], 27 | [0, 56, 255], 28 | [43, 140, 237], 29 | [37, 168, 36], 30 | [147, 147, 0], 31 | [70, 17, 145]] 32 | 33 | # Limb parents of each joint 34 | limb_parents = [1, 15, 1, 2, 3, 1, 5, 6, 14, 8, 9, 14, 11, 12, 14, 14, 1, 4, 7, 10, 13] 35 | 36 | # input scales 37 | scales = [1.0, 0.7] 38 | 39 | 40 | def demo(): 41 | joints_2d = np.zeros(shape=(args.num_of_joints, 2), dtype=np.int32) 42 | joints_3d = np.zeros(shape=(args.num_of_joints, 3), dtype=np.float32) 43 | 44 | if args.plot_3d: 45 | plt.ion() 46 | fig = plt.figure() 47 | ax = fig.add_subplot(121, projection='3d') 48 | ax2 = fig.add_subplot(122) 49 | plt.show() 50 | 51 | if args.device == 'cpu': 52 | caffe.set_mode_cpu() 53 | elif args.device == 'gpu': 54 | caffe.set_mode_gpu() 55 | caffe.set_device(0) 56 | else: 57 | raise ValueError('No such device') 58 | 59 | model_prototxt_path = os.path.join(args.model_dir, 'vnect_net.prototxt') 60 | model_weight_path = os.path.join(args.model_dir, 'vnect_model.caffemodel') 61 | 62 | # Load model 63 | model = caffe.Net(model_prototxt_path, 64 | model_weight_path, 65 | caffe.TEST) 66 | 67 | # Show network structure and shape 68 | for layer_name in model.params.keys(): 69 | print(layer_name, model.params[layer_name][0].data.shape) 70 | print('') 71 | 72 | for i in model.blobs.keys(): 73 | print(i, model.blobs[i].data.shape) 74 | 75 | cam = cv2.VideoCapture(0) 76 | is_tracking = False 77 | # for img_name in os.listdir('test_imgs'): 78 | while True: 79 | # if not is_tracking: 80 | 81 | img_path = 'test_imgs/{}'.format('dance.jpg') 82 | t1 = time.time() 83 | input_batch = [] 84 | 85 | cam_img = utils.read_square_image('', cam, args.input_size, 'WEBCAM') 86 | # cam_img = utils.read_square_image(img_path, '', args.input_size, 'IMAGE') 87 | # cv2.imshow('', cam_img) 88 | # cv2.waitKey(0) 89 | orig_size_input = cam_img.astype(np.float32) 90 | 91 | for scale in scales: 92 | resized_img = utils.resize_pad_img(orig_size_input, scale, args.input_size) 93 | input_batch.append(resized_img) 94 | 95 | input_batch = np.asarray(input_batch, dtype=np.float32) 96 | input_batch = np.transpose(input_batch, (0, 3, 1, 2)) 97 | input_batch /= 255.0 98 | input_batch -= 0.4 99 | 100 | model.blobs['data'].data[...] = input_batch 101 | 102 | # Forward 103 | model.forward() 104 | 105 | # Get output data 106 | x_hm = model.blobs['x_heatmap'].data 107 | y_hm = model.blobs['y_heatmap'].data 108 | z_hm = model.blobs['z_heatmap'].data 109 | hm = model.blobs['heatmap'].data 110 | 111 | # Trans coordinates 112 | x_hm = x_hm.transpose([0, 2, 3, 1]) 113 | y_hm = y_hm.transpose([0, 2, 3, 1]) 114 | z_hm = z_hm.transpose([0, 2, 3, 1]) 115 | hm = hm.transpose([0, 2, 3, 1]) 116 | 117 | # Average scale outputs 118 | hm_size = args.input_size // args.pool_scale 119 | hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 120 | x_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 121 | y_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 122 | z_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 123 | for i in range(len(scales)): 124 | rescale = 1.0 / scales[i] 125 | scaled_hm = cv2.resize(hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 126 | scaled_x_hm = cv2.resize(x_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 127 | scaled_y_hm = cv2.resize(y_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 128 | scaled_z_hm = cv2.resize(z_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 129 | mid = [scaled_hm.shape[0] // 2, scaled_hm.shape[1] // 2] 130 | hm_avg += scaled_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 131 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 132 | x_hm_avg += scaled_x_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 133 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 134 | y_hm_avg += scaled_y_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 135 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 136 | z_hm_avg += scaled_z_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 137 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 138 | hm_avg /= len(scales) 139 | x_hm_avg /= len(scales) 140 | y_hm_avg /= len(scales) 141 | z_hm_avg /= len(scales) 142 | 143 | t2 = time.time() 144 | # Get 2d joints 145 | joints_2d = utils.extract_2d_joint_from_heatmap(hm_avg, args.input_size, joints_2d) 146 | 147 | # Get 3d joints 148 | joints_3d = utils.extract_3d_joints_from_heatmap(joints_2d, x_hm_avg, y_hm_avg, z_hm_avg, args.input_size, 149 | joints_3d) 150 | print('Post FPS', 1/(time.time()-t2)) 151 | 152 | # Plot 2d location heatmap 153 | joint_map = np.zeros(shape=(args.input_size, args.input_size, 3)) 154 | for joint_num in range(joints_2d.shape[0]): 155 | cv2.circle(joint_map, center=(joints_2d[joint_num][1], joints_2d[joint_num][0]), radius=3, 156 | color=(255, 0, 0), thickness=-1) 157 | 158 | # Plot 2d limbs 159 | limb_img = utils.draw_limbs_2d(cam_img, joints_2d, limb_parents) 160 | 161 | # Plot 3d limbs 162 | if args.plot_3d: 163 | ax.clear() 164 | ax.view_init(azim=0, elev=90) 165 | ax.set_xlim(-700, 700) 166 | ax.set_ylim(-800, 800) 167 | ax.set_zlim(-700, 700) 168 | ax.set_xlabel('x') 169 | ax.set_ylabel('y') 170 | ax.set_zlabel('z') 171 | utils.draw_limbs_3d(joints_3d, limb_parents, ax) 172 | 173 | # draw heatmap 174 | # hm_img = utils.draw_predicted_heatmap(hm_avg*200, args.input_size) 175 | # cv2.imshow('hm', hm_img.astype(np.uint8)) 176 | # cv2.waitKey(0) 177 | 178 | 179 | glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) 180 | utils.draw_limb_3d_gl(joints_3d, limb_parents) 181 | pygame.display.flip() 182 | pygame.time.wait(1) 183 | 184 | 185 | concat_img = np.concatenate((limb_img, joint_map), axis=1) 186 | 187 | # ax2.imshow(concat_img[..., ::-1].astype(np.uint8)) 188 | cv2.imshow('2d', concat_img.astype(np.uint8)) 189 | cv2.waitKey(1) 190 | # ax2.imshow(concat_img.astype(np.uint8)) 191 | # plt.pause(0.0001) 192 | # plt.show(block=False) 193 | print('Forward FPS', 1 / (time.time() - t1)) 194 | 195 | 196 | if __name__ == '__main__': 197 | pygame.init() 198 | display = (800, 600) 199 | pygame.display.set_mode(display, DOUBLEBUF | OPENGL) 200 | 201 | gluPerspective(70, (display[0] / display[1]), 0.1, 200.0) 202 | view_range = 800 203 | # glOrtho(-view_range, view_range, 204 | # -view_range, view_range, 205 | # -view_range, view_range) 206 | 207 | glTranslatef(0.0, 0.0, 100) 208 | 209 | demo() 210 | -------------------------------------------------------------------------------- /vispy_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # ----------------------------------------------------------------------------- 3 | # Copyright (c) 2015, Vispy Development Team. All Rights Reserved. 4 | # Distributed under the (new) BSD License. See LICENSE.txt for more info. 5 | # ----------------------------------------------------------------------------- 6 | # Author: Nicolas P .Rougier 7 | # Date: 04/03/2014 8 | # ----------------------------------------------------------------------------- 9 | import math 10 | import numpy as np 11 | 12 | from vispy import app 13 | from vispy.gloo import gl 14 | 15 | 16 | def checkerboard(grid_num=8, grid_size=32): 17 | row_even = grid_num // 2 * [0, 1] 18 | row_odd = grid_num // 2 * [1, 0] 19 | Z = np.row_stack(grid_num // 2 * (row_even, row_odd)).astype(np.uint8) 20 | return 255 * Z.repeat(grid_size, axis=0).repeat(grid_size, axis=1) 21 | 22 | 23 | def rotate(M, angle, x, y, z, point=None): 24 | angle = math.pi * angle / 180 25 | c, s = math.cos(angle), math.sin(angle) 26 | n = math.sqrt(x * x + y * y + z * z) 27 | x /= n 28 | y /= n 29 | z /= n 30 | cx, cy, cz = (1 - c) * x, (1 - c) * y, (1 - c) * z 31 | R = np.array([[cx * x + c, cy * x - z * s, cz * x + y * s, 0], 32 | [cx * y + z * s, cy * y + c, cz * y - x * s, 0], 33 | [cx * z - y * s, cy * z + x * s, cz * z + c, 0], 34 | [0, 0, 0, 1]], dtype=M.dtype).T 35 | M[...] = np.dot(M, R) 36 | return M 37 | 38 | 39 | def translate(M, x, y=None, z=None): 40 | y = x if y is None else y 41 | z = x if z is None else z 42 | T = np.array([[1.0, 0.0, 0.0, x], 43 | [0.0, 1.0, 0.0, y], 44 | [0.0, 0.0, 1.0, z], 45 | [0.0, 0.0, 0.0, 1.0]], dtype=M.dtype).T 46 | M[...] = np.dot(M, T) 47 | return M 48 | 49 | 50 | def frustum(left, right, bottom, top, znear, zfar): 51 | M = np.zeros((4, 4), dtype=np.float32) 52 | M[0, 0] = +2.0 * znear / (right - left) 53 | M[2, 0] = (right + left) / (right - left) 54 | M[1, 1] = +2.0 * znear / (top - bottom) 55 | M[3, 1] = (top + bottom) / (top - bottom) 56 | M[2, 2] = -(zfar + znear) / (zfar - znear) 57 | M[3, 2] = -2.0 * znear * zfar / (zfar - znear) 58 | M[2, 3] = -1.0 59 | return M 60 | 61 | 62 | def perspective(fovy, aspect, znear, zfar): 63 | h = math.tan(fovy / 360.0 * math.pi) * znear 64 | w = h * aspect 65 | return frustum(-w, w, -h, h, znear, zfar) 66 | 67 | 68 | def makecube(): 69 | """ Generate vertices & indices for a filled cube """ 70 | 71 | vtype = [('a_position', np.float32, 3), 72 | ('a_texcoord', np.float32, 2)] 73 | itype = np.uint32 74 | 75 | # Vertices positions 76 | p = np.array([[1, 1, 1], [-1, 1, 1], [-1, -1, 1], [1, -1, 1], 77 | [1, -1, -1], [1, 1, -1], [-1, 1, -1], [-1, -1, -1]]) 78 | 79 | # Texture coords 80 | t = np.array([[0, 0], [0, 1], [1, 1], [1, 0]]) 81 | 82 | faces_p = [0, 1, 2, 3, 0, 3, 4, 5, 0, 5, 6, 83 | 1, 1, 6, 7, 2, 7, 4, 3, 2, 4, 7, 6, 5] 84 | faces_t = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 85 | 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] 86 | 87 | vertices = np.zeros(24, vtype) 88 | vertices['a_position'] = p[faces_p] 89 | vertices['a_texcoord'] = t[faces_t] 90 | 91 | indices = np.resize( 92 | np.array([0, 1, 2, 0, 2, 3], dtype=itype), 6 * (2 * 3)) 93 | indices += np.repeat(4 * np.arange(6), 6).astype(np.uint32) 94 | 95 | return vertices, indices 96 | 97 | 98 | cube_vertex = """ 99 | uniform mat4 u_model; 100 | uniform mat4 u_view; 101 | uniform mat4 u_projection; 102 | attribute vec3 a_position; 103 | attribute vec2 a_texcoord; 104 | varying vec2 v_texcoord; 105 | void main() 106 | { 107 | gl_Position = u_projection * u_view * u_model * vec4(a_position,1.0); 108 | v_texcoord = a_texcoord; 109 | } 110 | """ 111 | 112 | cube_fragment = """ 113 | uniform sampler2D u_texture; 114 | varying vec2 v_texcoord; 115 | void main() 116 | { 117 | gl_FragColor = texture2D(u_texture, v_texcoord); 118 | } 119 | """ 120 | 121 | 122 | class Canvas(app.Canvas): 123 | def __init__(self): 124 | app.Canvas.__init__(self, size=(512, 512), 125 | title='Rotating cube (GL version)', 126 | keys='interactive') 127 | 128 | def on_initialize(self, event): 129 | # Build & activate cube program 130 | self.cube = gl.glCreateProgram() 131 | vertex = gl.glCreateShader(gl.GL_VERTEX_SHADER) 132 | fragment = gl.glCreateShader(gl.GL_FRAGMENT_SHADER) 133 | gl.glShaderSource(vertex, cube_vertex) 134 | gl.glShaderSource(fragment, cube_fragment) 135 | gl.glCompileShader(vertex) 136 | gl.glCompileShader(fragment) 137 | gl.glAttachShader(self.cube, vertex) 138 | gl.glAttachShader(self.cube, fragment) 139 | gl.glLinkProgram(self.cube) 140 | gl.glDetachShader(self.cube, vertex) 141 | gl.glDetachShader(self.cube, fragment) 142 | gl.glUseProgram(self.cube) 143 | 144 | # Get data & build cube buffers 145 | vcube_data, self.icube_data = makecube() 146 | vcube = gl.glCreateBuffer() 147 | gl.glBindBuffer(gl.GL_ARRAY_BUFFER, vcube) 148 | gl.glBufferData(gl.GL_ARRAY_BUFFER, vcube_data, gl.GL_STATIC_DRAW) 149 | icube = gl.glCreateBuffer() 150 | gl.glBindBuffer(gl.GL_ELEMENT_ARRAY_BUFFER, icube) 151 | gl.glBufferData(gl.GL_ELEMENT_ARRAY_BUFFER, 152 | self.icube_data, gl.GL_STATIC_DRAW) 153 | 154 | # Bind cube attributes 155 | stride = vcube_data.strides[0] 156 | offset = 0 157 | loc = gl.glGetAttribLocation(self.cube, "a_position") 158 | gl.glEnableVertexAttribArray(loc) 159 | gl.glVertexAttribPointer(loc, 3, gl.GL_FLOAT, False, stride, offset) 160 | 161 | offset = vcube_data.dtype["a_position"].itemsize 162 | loc = gl.glGetAttribLocation(self.cube, "a_texcoord") 163 | gl.glEnableVertexAttribArray(loc) 164 | gl.glVertexAttribPointer(loc, 2, gl.GL_FLOAT, False, stride, offset) 165 | 166 | # Create & bind cube texture 167 | crate = checkerboard() 168 | texture = gl.glCreateTexture() 169 | gl.glTexParameterf(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, 170 | gl.GL_LINEAR) 171 | gl.glTexParameterf(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, 172 | gl.GL_LINEAR) 173 | gl.glTexParameterf(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, 174 | gl.GL_CLAMP_TO_EDGE) 175 | gl.glTexParameterf(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, 176 | gl.GL_CLAMP_TO_EDGE) 177 | gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_LUMINANCE, gl.GL_LUMINANCE, 178 | gl.GL_UNSIGNED_BYTE, crate.shape[:2]) 179 | gl.glTexSubImage2D(gl.GL_TEXTURE_2D, 0, 0, 0, gl.GL_LUMINANCE, 180 | gl.GL_UNSIGNED_BYTE, crate) 181 | loc = gl.glGetUniformLocation(self.cube, "u_texture") 182 | gl.glUniform1i(loc, texture) 183 | gl.glBindTexture(gl.GL_TEXTURE_2D, 0) 184 | 185 | # Create & bind cube matrices 186 | view = np.eye(4, dtype=np.float32) 187 | model = np.eye(4, dtype=np.float32) 188 | projection = np.eye(4, dtype=np.float32) 189 | translate(view, 0, 0, -7) 190 | self.phi, self.theta = 60, 20 191 | rotate(model, self.theta, 0, 0, 1) 192 | rotate(model, self.phi, 0, 1, 0) 193 | loc = gl.glGetUniformLocation(self.cube, "u_model") 194 | gl.glUniformMatrix4fv(loc, 1, False, model) 195 | loc = gl.glGetUniformLocation(self.cube, "u_view") 196 | gl.glUniformMatrix4fv(loc, 1, False, view) 197 | loc = gl.glGetUniformLocation(self.cube, "u_projection") 198 | gl.glUniformMatrix4fv(loc, 1, False, projection) 199 | 200 | # OpenGL initalization 201 | gl.glClearColor(0.30, 0.30, 0.35, 1.00) 202 | gl.glEnable(gl.GL_DEPTH_TEST) 203 | self._resize(*(self.size + self.physical_size)) 204 | self.timer = app.Timer('auto', self.on_timer, start=True) 205 | 206 | def on_draw(self, event): 207 | gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT) 208 | gl.glDrawElements(gl.GL_TRIANGLES, self.icube_data.size, 209 | gl.GL_UNSIGNED_INT, None) 210 | 211 | def on_resize(self, event): 212 | self._resize(*(event.size + event.physical_size)) 213 | 214 | def _resize(self, width, height, physical_width, physical_height): 215 | gl.glViewport(0, 0, physical_width, physical_height) 216 | projection = perspective(35.0, width / float(height), 2.0, 10.0) 217 | loc = gl.glGetUniformLocation(self.cube, "u_projection") 218 | gl.glUniformMatrix4fv(loc, 1, False, projection) 219 | 220 | def on_timer(self, event): 221 | self.theta += .5 222 | self.phi += .5 223 | model = np.eye(4, dtype=np.float32) 224 | rotate(model, self.theta, 0, 0, 1) 225 | rotate(model, self.phi, 0, 1, 0) 226 | loc = gl.glGetUniformLocation(self.cube, "u_model") 227 | gl.glUniformMatrix4fv(loc, 1, False, model) 228 | self.update() 229 | 230 | if __name__ == '__main__': 231 | c = Canvas() 232 | c.show() 233 | app.run() -------------------------------------------------------------------------------- /demo_tf_gl.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | 4 | import cv2 5 | import numpy as np 6 | import tensorflow as tf 7 | import pygame 8 | from pygame.locals import * 9 | from OpenGL.GL import * 10 | from OpenGL.GLU import * 11 | 12 | from models.nets import vnect_model_bn_folded as vnect_model 13 | import utils.utils as utils 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--device', default='gpu') 17 | parser.add_argument('--demo_type', default='image') 18 | parser.add_argument('--model_file', default='models/weights/vnect_tf') 19 | parser.add_argument('--test_img', default='test_imgs/yuniko.jpg') 20 | parser.add_argument('--input_size', default=368) 21 | parser.add_argument('--num_of_joints', default=21) 22 | parser.add_argument('--pool_scale', default=8) 23 | parser.add_argument('--plot_2d', default=True) 24 | parser.add_argument('--plot_3d', default=True) 25 | args = parser.parse_args() 26 | 27 | joint_color_code = [[139, 53, 255], 28 | [0, 56, 255], 29 | [43, 140, 237], 30 | [37, 168, 36], 31 | [147, 147, 0], 32 | [70, 17, 145]] 33 | 34 | # Limb parents of each joint 35 | limb_parents = [1, 15, 1, 2, 3, 1, 5, 6, 14, 8, 9, 14, 11, 12, 14, 14, 1, 4, 7, 10, 13] 36 | 37 | # input scales 38 | scales = [1.0, 0.7] 39 | 40 | # Use gpu or cpu 41 | gpu_count = {'GPU':1} if args.device == 'gpu' else {'GPU':0} 42 | 43 | 44 | def demo_single_image(): 45 | # Create model 46 | model_tf = vnect_model.VNect(args.input_size) 47 | 48 | # Create session 49 | sess_config = tf.ConfigProto(device_count=gpu_count) 50 | sess = tf.Session(config=sess_config) 51 | 52 | # Restore weights 53 | saver = tf.train.Saver() 54 | saver.restore(sess, args.model_file) 55 | 56 | # Joints placeholder 57 | joints_2d = np.zeros(shape=(args.num_of_joints, 2), dtype=np.int32) 58 | joints_3d = np.zeros(shape=(args.num_of_joints, 3), dtype=np.float32) 59 | 60 | 61 | img_path = args.test_img 62 | t1 = time.time() 63 | input_batch = [] 64 | 65 | cam_img = utils.read_square_image(img_path, '', args.input_size, 'IMAGE') 66 | orig_size_input = cam_img.astype(np.float32) 67 | 68 | # Create multi-scale inputs 69 | for scale in scales: 70 | resized_img = utils.resize_pad_img(orig_size_input, scale, args.input_size) 71 | input_batch.append(resized_img) 72 | 73 | input_batch = np.asarray(input_batch, dtype=np.float32) 74 | input_batch /= 255.0 75 | input_batch -= 0.4 76 | 77 | # Inference 78 | [hm, x_hm, y_hm, z_hm] = sess.run( 79 | [model_tf.heapmap, model_tf.x_heatmap, model_tf.y_heatmap, model_tf.z_heatmap], 80 | feed_dict={model_tf.input_holder: input_batch}) 81 | 82 | # Average scale outputs 83 | hm_size = args.input_size // args.pool_scale 84 | hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 85 | x_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 86 | y_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 87 | z_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 88 | for i in range(len(scales)): 89 | rescale = 1.0 / scales[i] 90 | scaled_hm = cv2.resize(hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 91 | scaled_x_hm = cv2.resize(x_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 92 | scaled_y_hm = cv2.resize(y_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 93 | scaled_z_hm = cv2.resize(z_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 94 | mid = [scaled_hm.shape[0] // 2, scaled_hm.shape[1] // 2] 95 | hm_avg += scaled_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 96 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 97 | x_hm_avg += scaled_x_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 98 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 99 | y_hm_avg += scaled_y_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 100 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 101 | z_hm_avg += scaled_z_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 102 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 103 | hm_avg /= len(scales) 104 | x_hm_avg /= len(scales) 105 | y_hm_avg /= len(scales) 106 | z_hm_avg /= len(scales) 107 | 108 | # Get 2d joints 109 | utils.extract_2d_joint_from_heatmap(hm_avg, args.input_size, joints_2d) 110 | 111 | # Get 3d joints 112 | utils.extract_3d_joints_from_heatmap(joints_2d, x_hm_avg, y_hm_avg, z_hm_avg, args.input_size, joints_3d) 113 | 114 | if args.plot_2d: 115 | # Plot 2d joint location 116 | joint_map = np.zeros(shape=(args.input_size, args.input_size, 3)) 117 | for joint_num in range(joints_2d.shape[0]): 118 | cv2.circle(joint_map, center=(joints_2d[joint_num][1], joints_2d[joint_num][0]), radius=3, 119 | color=(255, 0, 0), thickness=-1) 120 | # Draw 2d limbs 121 | utils.draw_limbs_2d(cam_img, joints_2d, limb_parents) 122 | 123 | 124 | print('FPS: {:>2.2f}'.format(1 / (time.time() - t1))) 125 | 126 | if args.plot_3d: 127 | # Draw 3d limbs 128 | glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) 129 | utils.draw_limbs_3d_gl(joints_3d, limb_parents) 130 | pygame.display.flip() 131 | pygame.time.wait(1) 132 | 133 | if args.plot_2d: 134 | # Display 2d results 135 | concat_img = np.concatenate((cam_img, joint_map), axis=1) 136 | cv2.imshow('2D', concat_img.astype(np.uint8)) 137 | cv2.waitKey(0) 138 | 139 | 140 | 141 | 142 | def demo_webcam(): 143 | # Create model 144 | model_tf = vnect_model.VNect(args.input_size) 145 | 146 | # Create session 147 | sess_config = tf.ConfigProto(device_count=gpu_count) 148 | sess = tf.Session(config=sess_config) 149 | 150 | # Restore weights 151 | saver = tf.train.Saver() 152 | saver.restore(sess, args.model_file) 153 | 154 | # Joints placeholder 155 | joints_2d = np.zeros(shape=(args.num_of_joints, 2), dtype=np.int32) 156 | joints_3d = np.zeros(shape=(args.num_of_joints, 3), dtype=np.float32) 157 | 158 | cam = cv2.VideoCapture(0) 159 | 160 | while True: 161 | t1 = time.time() 162 | input_batch = [] 163 | 164 | cam_img = utils.read_square_image('', cam, args.input_size, 'WEBCAM') 165 | orig_size_input = cam_img.astype(np.float32) 166 | 167 | # Create multi-scale inputs 168 | for scale in scales: 169 | resized_img = utils.resize_pad_img(orig_size_input, scale, args.input_size) 170 | input_batch.append(resized_img) 171 | 172 | input_batch = np.asarray(input_batch, dtype=np.float32) 173 | input_batch /= 255.0 174 | input_batch -= 0.4 175 | 176 | # Inference 177 | [hm, x_hm, y_hm, z_hm] = sess.run( 178 | [model_tf.heapmap, model_tf.x_heatmap, model_tf.y_heatmap, model_tf.z_heatmap], 179 | feed_dict={model_tf.input_holder: input_batch}) 180 | 181 | # Average scale outputs 182 | hm_size = args.input_size // args.pool_scale 183 | hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 184 | x_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 185 | y_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 186 | z_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 187 | for i in range(len(scales)): 188 | rescale = 1.0 / scales[i] 189 | scaled_hm = cv2.resize(hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 190 | scaled_x_hm = cv2.resize(x_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 191 | scaled_y_hm = cv2.resize(y_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 192 | scaled_z_hm = cv2.resize(z_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 193 | mid = [scaled_hm.shape[0] // 2, scaled_hm.shape[1] // 2] 194 | hm_avg += scaled_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 195 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 196 | x_hm_avg += scaled_x_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 197 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 198 | y_hm_avg += scaled_y_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 199 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 200 | z_hm_avg += scaled_z_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 201 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 202 | hm_avg /= len(scales) 203 | x_hm_avg /= len(scales) 204 | y_hm_avg /= len(scales) 205 | z_hm_avg /= len(scales) 206 | 207 | # Get 2d joints 208 | utils.extract_2d_joint_from_heatmap(hm_avg, args.input_size, joints_2d) 209 | 210 | # Get 3d joints 211 | utils.extract_3d_joints_from_heatmap(joints_2d, x_hm_avg, y_hm_avg, z_hm_avg, args.input_size, joints_3d) 212 | 213 | if args.plot_2d: 214 | # Plot 2d joint location 215 | joint_map = np.zeros(shape=(args.input_size, args.input_size, 3)) 216 | for joint_num in range(joints_2d.shape[0]): 217 | cv2.circle(joint_map, center=(joints_2d[joint_num][1], joints_2d[joint_num][0]), radius=3, 218 | color=(255, 0, 0), thickness=-1) 219 | # Draw 2d limbs 220 | utils.draw_limbs_2d(cam_img, joints_2d, limb_parents) 221 | 222 | print('FPS: {:>2.2f}'.format(1 / (time.time() - t1))) 223 | 224 | if args.plot_3d: 225 | # Draw 3d limbs 226 | glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) 227 | utils.draw_limbs_3d_gl(joints_3d, limb_parents) 228 | pygame.display.flip() 229 | pygame.time.wait(1) 230 | 231 | if args.plot_2d: 232 | # Display 2d results 233 | concat_img = np.concatenate((cam_img, joint_map), axis=1) 234 | cv2.imshow('2D', concat_img.astype(np.uint8)) 235 | if cv2.waitKey(1) == ord('q'): break 236 | 237 | 238 | 239 | if __name__ == '__main__': 240 | # GL initiation 241 | pygame.init() 242 | display = (800, 600) 243 | 244 | glMatrixMode(GL_PROJECTION) 245 | glLoadIdentity() 246 | pygame.display.set_mode(display, DOUBLEBUF | OPENGL) 247 | gluPerspective(70, (display[0] / display[1]), 0.1, 800.0) 248 | glMatrixMode(GL_MODELVIEW) 249 | gluLookAt(-.0, 0.0, -200.0, 250 | 5.0, 0.0, 0.0, 251 | -5.0, -1.0, -10.0) 252 | 253 | if args.demo_type == 'image': 254 | demo_single_image() 255 | elif args.demo_type == 'webcam': 256 | demo_webcam() 257 | -------------------------------------------------------------------------------- /demo_tf.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | 4 | import cv2 5 | import numpy as np 6 | import tensorflow as tf 7 | import matplotlib.pyplot as plt 8 | from mpl_toolkits.mplot3d import Axes3D 9 | 10 | from models.nets import vnect_model_bn_folded as vnect_model 11 | import utils.utils as utils 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--demo_type', default='image') 15 | parser.add_argument('--device', default='cpu') 16 | parser.add_argument('--model_file', default='models/weights/vnect_tf') 17 | parser.add_argument('--test_img', default='test_imgs/yuniko.jpg') 18 | parser.add_argument('--input_size', default=368) 19 | parser.add_argument('--num_of_joints', default=21) 20 | parser.add_argument('--pool_scale', default=8) 21 | parser.add_argument('--plot_2d', default=True) 22 | parser.add_argument('--plot_3d', default=True) 23 | args = parser.parse_args() 24 | 25 | joint_color_code = [[139, 53, 255], 26 | [0, 56, 255], 27 | [43, 140, 237], 28 | [37, 168, 36], 29 | [147, 147, 0], 30 | [70, 17, 145]] 31 | 32 | # Limb parents of each joint 33 | limb_parents = [1, 15, 1, 2, 3, 1, 5, 6, 14, 8, 9, 14, 11, 12, 14, 14, 1, 4, 7, 10, 13] 34 | 35 | # input scales 36 | scales = [1.0, 0.7] 37 | 38 | # Use gpu or cpu 39 | gpu_count = {'GPU':1} if args.device == 'gpu' else {'GPU':0} 40 | 41 | 42 | def demo_single_image(): 43 | if args.plot_3d: 44 | plt.ion() 45 | fig = plt.figure() 46 | ax = fig.add_subplot(121, projection='3d') 47 | ax2 = fig.add_subplot(122) 48 | plt.show() 49 | 50 | # Create model 51 | model_tf = vnect_model.VNect(args.input_size) 52 | 53 | # Create session 54 | sess_config = tf.ConfigProto(device_count=gpu_count) 55 | sess = tf.Session(config=sess_config) 56 | 57 | # Restore weights 58 | saver = tf.train.Saver() 59 | saver.restore(sess, args.model_file) 60 | 61 | # Joints placeholder 62 | joints_2d = np.zeros(shape=(args.num_of_joints, 2), dtype=np.int32) 63 | joints_3d = np.zeros(shape=(args.num_of_joints, 3), dtype=np.float32) 64 | 65 | 66 | img_path = args.test_img 67 | t1 = time.time() 68 | input_batch = [] 69 | 70 | cam_img = utils.read_square_image(img_path, '', args.input_size, 'IMAGE') 71 | orig_size_input = cam_img.astype(np.float32) 72 | 73 | # Create multi-scale inputs 74 | for scale in scales: 75 | resized_img = utils.resize_pad_img(orig_size_input, scale, args.input_size) 76 | input_batch.append(resized_img) 77 | 78 | input_batch = np.asarray(input_batch, dtype=np.float32) 79 | input_batch /= 255.0 80 | input_batch -= 0.4 81 | 82 | # Inference 83 | [hm, x_hm, y_hm, z_hm] = sess.run( 84 | [model_tf.heapmap, model_tf.x_heatmap, model_tf.y_heatmap, model_tf.z_heatmap], 85 | feed_dict={model_tf.input_holder: input_batch}) 86 | 87 | # Average scale outputs 88 | hm_size = args.input_size // args.pool_scale 89 | hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 90 | x_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 91 | y_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 92 | z_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 93 | for i in range(len(scales)): 94 | rescale = 1.0 / scales[i] 95 | scaled_hm = cv2.resize(hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 96 | scaled_x_hm = cv2.resize(x_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 97 | scaled_y_hm = cv2.resize(y_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 98 | scaled_z_hm = cv2.resize(z_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 99 | mid = [scaled_hm.shape[0] // 2, scaled_hm.shape[1] // 2] 100 | hm_avg += scaled_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 101 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 102 | x_hm_avg += scaled_x_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 103 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 104 | y_hm_avg += scaled_y_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 105 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 106 | z_hm_avg += scaled_z_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 107 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 108 | hm_avg /= len(scales) 109 | x_hm_avg /= len(scales) 110 | y_hm_avg /= len(scales) 111 | z_hm_avg /= len(scales) 112 | 113 | # Get 2d joints 114 | utils.extract_2d_joint_from_heatmap(hm_avg, args.input_size, joints_2d) 115 | 116 | # Get 3d joints 117 | utils.extract_3d_joints_from_heatmap(joints_2d, x_hm_avg, y_hm_avg, z_hm_avg, args.input_size, joints_3d) 118 | 119 | if args.plot_2d: 120 | # Plot 2d joint location 121 | joint_map = np.zeros(shape=(args.input_size, args.input_size, 3)) 122 | for joint_num in range(joints_2d.shape[0]): 123 | cv2.circle(joint_map, center=(joints_2d[joint_num][1], joints_2d[joint_num][0]), radius=3, 124 | color=(255, 0, 0), thickness=-1) 125 | # Draw 2d limbs 126 | utils.draw_limbs_2d(cam_img, joints_2d, limb_parents) 127 | 128 | 129 | print('FPS: {:>2.2f}'.format(1 / (time.time() - t1))) 130 | 131 | if args.plot_3d: 132 | ax.clear() 133 | ax.view_init(azim=0, elev=90) 134 | ax.set_xlim(-50, 50) 135 | ax.set_ylim(-50, 50) 136 | ax.set_zlim(-50, 50) 137 | ax.set_xlabel('x') 138 | ax.set_ylabel('y') 139 | ax.set_zlabel('z') 140 | utils.draw_limbs_3d(joints_3d, limb_parents, ax) 141 | 142 | if args.plot_2d: 143 | # Display 2d results 144 | concat_img = np.concatenate((cam_img[:, :, ::-1], joint_map), axis=1) 145 | ax2.imshow(concat_img.astype(np.uint8)) 146 | plt.pause(100000) 147 | plt.show(block=False) 148 | 149 | elif args.plot_2d: 150 | concat_img = np.concatenate((cam_img, joint_map), axis=1) 151 | cv2.imshow('2D img', concat_img.astype(np.uint8)) 152 | cv2.waitKey(1) 153 | 154 | 155 | 156 | def demo_webcam(): 157 | if args.plot_3d: 158 | plt.ion() 159 | fig = plt.figure() 160 | ax = fig.add_subplot(121, projection='3d') 161 | ax2 = fig.add_subplot(122) 162 | plt.show() 163 | 164 | # Create model 165 | model_tf = vnect_model.VNect(args.input_size) 166 | 167 | # Create session 168 | sess_config = tf.ConfigProto(device_count=gpu_count) 169 | sess = tf.Session(config=sess_config) 170 | 171 | # Restore weights 172 | saver = tf.train.Saver() 173 | saver.restore(sess, args.model_file) 174 | 175 | # Joints placeholder 176 | joints_2d = np.zeros(shape=(args.num_of_joints, 2), dtype=np.int32) 177 | joints_3d = np.zeros(shape=(args.num_of_joints, 3), dtype=np.float32) 178 | 179 | cam = cv2.VideoCapture(0) 180 | 181 | while True: 182 | t1 = time.time() 183 | input_batch = [] 184 | 185 | cam_img = utils.read_square_image('', cam, args.input_size, 'WEBCAM') 186 | orig_size_input = cam_img.astype(np.float32) 187 | 188 | # Create multi-scale inputs 189 | for scale in scales: 190 | resized_img = utils.resize_pad_img(orig_size_input, scale, args.input_size) 191 | input_batch.append(resized_img) 192 | 193 | input_batch = np.asarray(input_batch, dtype=np.float32) 194 | input_batch /= 255.0 195 | input_batch -= 0.4 196 | 197 | # Inference 198 | [hm, x_hm, y_hm, z_hm] = sess.run( 199 | [model_tf.heapmap, model_tf.x_heatmap, model_tf.y_heatmap, model_tf.z_heatmap], 200 | feed_dict={model_tf.input_holder: input_batch}) 201 | 202 | # Average scale outputs 203 | hm_size = args.input_size // args.pool_scale 204 | hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 205 | x_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 206 | y_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 207 | z_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 208 | for i in range(len(scales)): 209 | rescale = 1.0 / scales[i] 210 | scaled_hm = cv2.resize(hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 211 | scaled_x_hm = cv2.resize(x_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 212 | scaled_y_hm = cv2.resize(y_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 213 | scaled_z_hm = cv2.resize(z_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 214 | mid = [scaled_hm.shape[0] // 2, scaled_hm.shape[1] // 2] 215 | hm_avg += scaled_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 216 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 217 | x_hm_avg += scaled_x_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 218 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 219 | y_hm_avg += scaled_y_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 220 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 221 | z_hm_avg += scaled_z_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 222 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 223 | hm_avg /= len(scales) 224 | x_hm_avg /= len(scales) 225 | y_hm_avg /= len(scales) 226 | z_hm_avg /= len(scales) 227 | 228 | # Get 2d joints 229 | utils.extract_2d_joint_from_heatmap(hm_avg, args.input_size, joints_2d) 230 | 231 | # Get 3d joints 232 | utils.extract_3d_joints_from_heatmap(joints_2d, x_hm_avg, y_hm_avg, z_hm_avg, args.input_size, joints_3d) 233 | 234 | if args.plot_2d: 235 | # Plot 2d joint location 236 | joint_map = np.zeros(shape=(args.input_size, args.input_size, 3)) 237 | for joint_num in range(joints_2d.shape[0]): 238 | cv2.circle(joint_map, center=(joints_2d[joint_num][1], joints_2d[joint_num][0]), radius=3, 239 | color=(255, 0, 0), thickness=-1) 240 | # Draw 2d limbs 241 | utils.draw_limbs_2d(cam_img, joints_2d, limb_parents) 242 | 243 | if args.plot_3d: 244 | ax.clear() 245 | ax.view_init(azim=0, elev=90) 246 | ax.set_xlim(-50, 50) 247 | ax.set_ylim(-50, 50) 248 | ax.set_zlim(-50, 50) 249 | ax.set_xlabel('x') 250 | ax.set_ylabel('y') 251 | ax.set_zlabel('z') 252 | utils.draw_limbs_3d(joints_3d, limb_parents, ax) 253 | 254 | if args.plot_2d: 255 | # Display 2d results 256 | concat_img = np.concatenate((cam_img[:, :, ::-1], joint_map), axis=1) 257 | ax2.imshow(concat_img.astype(np.uint8)) 258 | plt.pause(0.00001) 259 | plt.show(block=False) 260 | 261 | elif args.plot_2d: 262 | concat_img = np.concatenate((cam_img, joint_map), axis=1) 263 | cv2.imshow('2D img', concat_img.astype(np.uint8)) 264 | if cv2.waitKey(1) == ord('q'): break 265 | 266 | print('FPS: {:>2.2f}'.format(1 / (time.time() - t1))) 267 | 268 | 269 | 270 | if __name__ == '__main__': 271 | 272 | if args.demo_type == 'image': 273 | demo_single_image() 274 | elif args.demo_type == 'webcam': 275 | demo_webcam() 276 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /models/vnect_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib as tc 3 | 4 | import pickle 5 | import numpy as np 6 | 7 | 8 | class VNect(): 9 | def __init__(self, input_size): 10 | self.is_training = False 11 | self.input_holder = tf.placeholder(dtype=tf.float32, 12 | shape=(None, input_size, input_size, 3)) 13 | self._create_network() 14 | 15 | def _create_network(self): 16 | # Conv 17 | self.conv1 = tc.layers.conv2d(self.input_holder, kernel_size=7, num_outputs=64, stride=2, scope='conv1') 18 | self.pool1 = tc.layers.max_pool2d(self.conv1, kernel_size=3, padding='same', scope='pool1') 19 | 20 | # Residual block 2a 21 | self.res2a_branch2a = tc.layers.conv2d(self.pool1, kernel_size=1, num_outputs=64, scope='res2a_branch2a') 22 | self.res2a_branch2b = tc.layers.conv2d(self.res2a_branch2a, kernel_size=3, num_outputs=64, scope='res2a_branch2b') 23 | self.res2a_branch2c = tc.layers.conv2d(self.res2a_branch2b, kernel_size=1, num_outputs=256, activation_fn=None, scope='res2a_branch2c') 24 | self.res2a_branch1 = tc.layers.conv2d(self.pool1, kernel_size=1, num_outputs=256, activation_fn=None, scope='res2a_branch1') 25 | self.res2a = tf.add(self.res2a_branch2c, self.res2a_branch1, name='res2a_add') 26 | self.res2a = tf.nn.relu(self.res2a, name='res2a') 27 | 28 | # Residual block 2b 29 | self.res2b_branch2a = tc.layers.conv2d(self.res2a, kernel_size=1, num_outputs=64, scope='res2b_branch2a') 30 | self.res2b_branch2b = tc.layers.conv2d(self.res2b_branch2a, kernel_size=3, num_outputs=64, scope='res2b_branch2b') 31 | self.res2b_branch2c = tc.layers.conv2d(self.res2b_branch2b, kernel_size=1, num_outputs=256, activation_fn=None, scope='res2b_branch2c') 32 | self.res2b = tf.add(self.res2b_branch2c, self.res2a, name='res2b_add') 33 | self.res2b = tf.nn.relu(self.res2b, name='res2b') 34 | 35 | # Residual block 2c 36 | self.res2c_branch2a = tc.layers.conv2d(self.res2b, kernel_size=1, num_outputs=64, scope='res2c_branch2a') 37 | self.res2c_branch2b = tc.layers.conv2d(self.res2c_branch2a, kernel_size=3, num_outputs=64, scope='res2c_branch2b') 38 | self.res2c_branch2c = tc.layers.conv2d(self.res2c_branch2b, kernel_size=1, num_outputs=256, activation_fn=None, scope='res2c_branch2c') 39 | self.res2c = tf.add(self.res2c_branch2c, self.res2b, name='res2c_add') 40 | self.res2c = tf.nn.relu(self.res2c, name='res2c') 41 | 42 | # Residual block 3a 43 | self.res3a_branch2a = tc.layers.conv2d(self.res2c, kernel_size=1, num_outputs=128, stride=2, scope='res3a_branch2a') 44 | self.res3a_branch2b = tc.layers.conv2d(self.res3a_branch2a, kernel_size=3, num_outputs=128, scope='res3a_branch2b') 45 | self.res3a_branch2c = tc.layers.conv2d(self.res3a_branch2b, kernel_size=1, num_outputs=512, activation_fn=None,scope='res3a_branch2c') 46 | self.res3a_branch1 = tc.layers.conv2d(self.res2c, kernel_size=1, num_outputs=512, activation_fn=None, stride=2, scope='res3a_branch1') 47 | self.res3a = tf.add(self.res3a_branch2c, self.res3a_branch1, name='res3a_add') 48 | self.res3a = tf.nn.relu(self.res3a, name='res3a') 49 | 50 | # Residual block 3b 51 | self.res3b_branch2a = tc.layers.conv2d(self.res3a, kernel_size=1, num_outputs=128, scope='res3b_branch2a') 52 | self.res3b_branch2b = tc.layers.conv2d(self.res3b_branch2a, kernel_size=3, num_outputs=128,scope='res3b_branch2b') 53 | self.res3b_branch2c = tc.layers.conv2d(self.res3b_branch2b, kernel_size=1, num_outputs=512, activation_fn=None,scope='res3b_branch2c') 54 | self.res3b = tf.add(self.res3b_branch2c, self.res3a, name='res3b_add') 55 | self.res3b = tf.nn.relu(self.res3b, name='res3b') 56 | 57 | # Residual block 3c 58 | self.res3c_branch2a = tc.layers.conv2d(self.res3b, kernel_size=1, num_outputs=128, scope='res3c_branch2a') 59 | self.res3c_branch2b = tc.layers.conv2d(self.res3c_branch2a, kernel_size=3, num_outputs=128,scope='res3c_branch2b') 60 | self.res3c_branch2c = tc.layers.conv2d(self.res3c_branch2b, kernel_size=1, num_outputs=512, activation_fn=None,scope='res3c_branch2c') 61 | self.res3c = tf.add(self.res3c_branch2c, self.res3b, name='res3c_add') 62 | self.res3c = tf.nn.relu(self.res3c, name='res3c') 63 | 64 | # Residual block 3d 65 | self.res3d_branch2a = tc.layers.conv2d(self.res3c, kernel_size=1, num_outputs=128, scope='res3d_branch2a') 66 | self.res3d_branch2b = tc.layers.conv2d(self.res3d_branch2a, kernel_size=3, num_outputs=128,scope='res3d_branch2b') 67 | self.res3d_branch2c = tc.layers.conv2d(self.res3d_branch2b, kernel_size=1, num_outputs=512, activation_fn=None,scope='res3d_branch2c') 68 | self.res3d = tf.add(self.res3d_branch2c, self.res3c, name='res3d_add') 69 | self.res3d = tf.nn.relu(self.res3d, name='res3d') 70 | 71 | # Residual block 4a 72 | self.res4a_branch2a = tc.layers.conv2d(self.res3d, kernel_size=1, num_outputs=256, stride=2, scope='res4a_branch2a') 73 | self.res4a_branch2b = tc.layers.conv2d(self.res4a_branch2a, kernel_size=3, num_outputs=256,scope='res4a_branch2b') 74 | self.res4a_branch2c = tc.layers.conv2d(self.res4a_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None,scope='res4a_branch2c') 75 | self.res4a_branch1 = tc.layers.conv2d(self.res3d, kernel_size=1, num_outputs=1024, activation_fn=None, stride=2, scope='res4a_branch1') 76 | self.res4a = tf.add(self.res4a_branch2c, self.res4a_branch1, name='res4a_add') 77 | self.res4a = tf.nn.relu(self.res4a, name='res4a') 78 | 79 | # Residual block 4b 80 | self.res4b_branch2a = tc.layers.conv2d(self.res4a, kernel_size=1, num_outputs=256, scope='res4b_branch2a') 81 | self.res4b_branch2b = tc.layers.conv2d(self.res4b_branch2a, kernel_size=3, num_outputs=256, scope='res4b_branch2b') 82 | self.res4b_branch2c = tc.layers.conv2d(self.res4b_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res4b_branch2c') 83 | self.res4b = tf.add(self.res4b_branch2c, self.res4a, name='res4b_add') 84 | self.res4b = tf.nn.relu(self.res4b, name='res4b') 85 | 86 | # Residual block 4c 87 | self.res4c_branch2a = tc.layers.conv2d(self.res4b, kernel_size=1, num_outputs=256, scope='res4c_branch2a') 88 | self.res4c_branch2b = tc.layers.conv2d(self.res4c_branch2a, kernel_size=3, num_outputs=256, scope='res4c_branch2b') 89 | self.res4c_branch2c = tc.layers.conv2d(self.res4c_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res4c_branch2c') 90 | self.res4c = tf.add(self.res4c_branch2c, self.res4b, name='res4c_add') 91 | self.res4c = tf.nn.relu(self.res4c, name='res4c') 92 | 93 | # Residual block 4d 94 | self.res4d_branch2a = tc.layers.conv2d(self.res4c, kernel_size=1, num_outputs=256, scope='res4d_branch2a') 95 | self.res4d_branch2b = tc.layers.conv2d(self.res4d_branch2a, kernel_size=3, num_outputs=256, scope='res4d_branch2b') 96 | self.res4d_branch2c = tc.layers.conv2d(self.res4d_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res4d_branch2c') 97 | self.res4d = tf.add(self.res4d_branch2c, self.res4c, name='res4d_add') 98 | self.res4d = tf.nn.relu(self.res4d, name='res4d') 99 | 100 | # Residual block 4e 101 | self.res4e_branch2a = tc.layers.conv2d(self.res4d, kernel_size=1, num_outputs=256, scope='res4e_branch2a') 102 | self.res4e_branch2b = tc.layers.conv2d(self.res4e_branch2a, kernel_size=3, num_outputs=256, scope='res4e_branch2b') 103 | self.res4e_branch2c = tc.layers.conv2d(self.res4e_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res4e_branch2c') 104 | self.res4e = tf.add(self.res4e_branch2c, self.res4d, name='res4e_add') 105 | self.res4e = tf.nn.relu(self.res4e, name='res4e') 106 | 107 | # Residual block 4f 108 | self.res4f_branch2a = tc.layers.conv2d(self.res4e, kernel_size=1, num_outputs=256, scope='res4f_branch2a') 109 | self.res4f_branch2b = tc.layers.conv2d(self.res4f_branch2a, kernel_size=3, num_outputs=256, scope='res4f_branch2b') 110 | self.res4f_branch2c = tc.layers.conv2d(self.res4f_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res4f_branch2c') 111 | self.res4f = tf.add(self.res4f_branch2c, self.res4e, name='res4f_add') 112 | self.res4f = tf.nn.relu(self.res4f, name='res4f') 113 | 114 | # Residual block 5a 115 | self.res5a_branch2a_new = tc.layers.conv2d(self.res4f, kernel_size=1, num_outputs=512, scope='res5a_branch2a_new') 116 | self.res5a_branch2b_new = tc.layers.conv2d(self.res5a_branch2a_new, kernel_size=3, num_outputs=512, scope='res5a_branch2b_new') 117 | self.res5a_branch2c_new = tc.layers.conv2d(self.res5a_branch2b_new, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res5a_branch2c_new') 118 | self.res5a_branch1_new = tc.layers.conv2d(self.res4f, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res5a_branch1_new') 119 | self.res5a = tf.add(self.res5a_branch2c_new, self.res5a_branch1_new, name='res5a_add') 120 | self.res5a = tf.nn.relu(self.res5a, name='res5a') 121 | 122 | # Residual block 5b 123 | self.res5b_branch2a_new = tc.layers.conv2d(self.res5a, kernel_size=1, num_outputs=256, scope='res5b_branch2a_new') 124 | self.res5b_branch2b_new = tc.layers.conv2d(self.res5b_branch2a_new, kernel_size=3, num_outputs=128, scope='res5b_branch2b_new') 125 | self.res5b_branch2c_new = tc.layers.conv2d(self.res5b_branch2b_new, kernel_size=1, num_outputs=256, scope='res5b_branch2c_new') 126 | 127 | # Transpose Conv 128 | self.res5c_branch1a = tf.layers.conv2d_transpose(self.res5b_branch2c_new, kernel_size=4, filters=63, activation=None, strides=2, padding='same', use_bias=False, name='res5c_branch1a') 129 | self.res5c_branch2a = tf.layers.conv2d_transpose(self.res5b_branch2c_new, kernel_size=4, filters=128, activation=None, strides=2, padding='same', use_bias=False, name='res5c_branch2a') 130 | self.bn5c_branch2a = tc.layers.batch_norm(self.res5c_branch2a, scale=True, is_training=self.is_training, scope='bn5c_branch2a') 131 | self.bn5c_branch2a = tf.nn.relu(self.bn5c_branch2a) 132 | 133 | self.res5c_delta_x, self.res5c_delta_y, self.res5c_delta_z = tf.split(self.res5c_branch1a, num_or_size_splits=3, axis=3) 134 | self.res5c_branch1a_sqr = tf.multiply(self.res5c_branch1a, self.res5c_branch1a, name='res5c_branch1a_sqr') 135 | self.res5c_delta_x_sqr, self.res5c_delta_y_sqr, self.res5c_delta_z_sqr = tf.split(self.res5c_branch1a_sqr, num_or_size_splits=3, axis=3) 136 | self.res5c_bone_length_sqr = tf.add(tf.add(self.res5c_delta_x_sqr, self.res5c_delta_y_sqr), self.res5c_delta_z_sqr) 137 | self.res5c_bone_length = tf.sqrt(self.res5c_bone_length_sqr) 138 | 139 | self.res5c_branch2a_feat = tf.concat([self.bn5c_branch2a, self.res5c_delta_x, self.res5c_delta_y, self.res5c_delta_z, self.res5c_bone_length], 140 | axis=3, name='res5c_branch2a_feat') 141 | 142 | self.res5c_branch2b = tc.layers.conv2d(self.res5c_branch2a_feat, kernel_size=3, num_outputs=128, scope='res5c_branch2b') 143 | self.res5c_branch2c = tf.layers.conv2d(self.res5c_branch2b, kernel_size=1, filters=84, activation=None, use_bias=False, name='res5c_branch2c') 144 | self.heapmap, self.x_heatmap, self.y_heatmap, self.z_heatmap = tf.split(self.res5c_branch2c, num_or_size_splits=4, axis=3) 145 | 146 | 147 | @property 148 | def all_vars(self): 149 | return tf.global_variables() 150 | 151 | 152 | def load_weights(self, sess, weight_file): 153 | # Read pretrained model file 154 | model_weights = pickle.load(open(weight_file, 'rb')) 155 | 156 | # For each layer each var 157 | with tf.variable_scope('', reuse=True): 158 | for variable in tf.global_variables(): 159 | var_name = variable.name.split(':')[0] 160 | self._assign_weights_from_dict(var_name, model_weights, sess) 161 | 162 | 163 | def _assign_weights_from_dict(self, var_name, model_weights, sess): 164 | with tf.variable_scope('', reuse=True): 165 | var_tf = tf.get_variable(var_name) 166 | # print(var_tf) 167 | sess.run(tf.assign(var_tf, model_weights[var_name])) 168 | np.testing.assert_allclose(var_tf.eval(sess), model_weights[var_name]) 169 | 170 | 171 | 172 | 173 | if __name__ == '__main__': 174 | model_file = 'vnect.pkl' 175 | model = VNect(368) 176 | 177 | 178 | with tf.Session() as sess: 179 | saver = tf.train.Saver() 180 | tf_writer = tf.summary.FileWriter(logdir='./', graph=sess.graph) 181 | 182 | sess.run(tf.global_variables_initializer()) 183 | print(model.res5b_branch2c_new) 184 | print(model.heapmap, model.x_heatmap, model.y_heatmap, model.z_heatmap) 185 | 186 | 187 | -------------------------------------------------------------------------------- /models/nets/vnect_model_bn_folded.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib as tc 3 | 4 | import pickle 5 | import numpy as np 6 | 7 | 8 | class VNect(): 9 | def __init__(self, input_size): 10 | self.is_training = False 11 | self.input_holder = tf.placeholder(dtype=tf.float32, 12 | shape=(None, input_size, input_size, 3)) 13 | self._create_network() 14 | 15 | def _create_network(self): 16 | # Conv 17 | self.conv1 = tc.layers.conv2d(self.input_holder, kernel_size=7, num_outputs=64, stride=2, scope='conv1') 18 | self.pool1 = tc.layers.max_pool2d(self.conv1, kernel_size=3, padding='same', scope='pool1') 19 | 20 | # Residual block 2a 21 | self.res2a_branch2a = tc.layers.conv2d(self.pool1, kernel_size=1, num_outputs=64, scope='res2a_branch2a') 22 | self.res2a_branch2b = tc.layers.conv2d(self.res2a_branch2a, kernel_size=3, num_outputs=64, scope='res2a_branch2b') 23 | self.res2a_branch2c = tc.layers.conv2d(self.res2a_branch2b, kernel_size=1, num_outputs=256, activation_fn=None, scope='res2a_branch2c') 24 | self.res2a_branch1 = tc.layers.conv2d(self.pool1, kernel_size=1, num_outputs=256, activation_fn=None, scope='res2a_branch1') 25 | self.res2a = tf.add(self.res2a_branch2c, self.res2a_branch1, name='res2a_add') 26 | self.res2a = tf.nn.relu(self.res2a, name='res2a') 27 | 28 | # Residual block 2b 29 | self.res2b_branch2a = tc.layers.conv2d(self.res2a, kernel_size=1, num_outputs=64, scope='res2b_branch2a') 30 | self.res2b_branch2b = tc.layers.conv2d(self.res2b_branch2a, kernel_size=3, num_outputs=64, scope='res2b_branch2b') 31 | self.res2b_branch2c = tc.layers.conv2d(self.res2b_branch2b, kernel_size=1, num_outputs=256, activation_fn=None, scope='res2b_branch2c') 32 | self.res2b = tf.add(self.res2b_branch2c, self.res2a, name='res2b_add') 33 | self.res2b = tf.nn.relu(self.res2b, name='res2b') 34 | 35 | # Residual block 2c 36 | self.res2c_branch2a = tc.layers.conv2d(self.res2b, kernel_size=1, num_outputs=64, scope='res2c_branch2a') 37 | self.res2c_branch2b = tc.layers.conv2d(self.res2c_branch2a, kernel_size=3, num_outputs=64, scope='res2c_branch2b') 38 | self.res2c_branch2c = tc.layers.conv2d(self.res2c_branch2b, kernel_size=1, num_outputs=256, activation_fn=None, scope='res2c_branch2c') 39 | self.res2c = tf.add(self.res2c_branch2c, self.res2b, name='res2c_add') 40 | self.res2c = tf.nn.relu(self.res2c, name='res2c') 41 | 42 | # Residual block 3a 43 | self.res3a_branch2a = tc.layers.conv2d(self.res2c, kernel_size=1, num_outputs=128, stride=2, scope='res3a_branch2a') 44 | self.res3a_branch2b = tc.layers.conv2d(self.res3a_branch2a, kernel_size=3, num_outputs=128, scope='res3a_branch2b') 45 | self.res3a_branch2c = tc.layers.conv2d(self.res3a_branch2b, kernel_size=1, num_outputs=512, activation_fn=None,scope='res3a_branch2c') 46 | self.res3a_branch1 = tc.layers.conv2d(self.res2c, kernel_size=1, num_outputs=512, activation_fn=None, stride=2, scope='res3a_branch1') 47 | self.res3a = tf.add(self.res3a_branch2c, self.res3a_branch1, name='res3a_add') 48 | self.res3a = tf.nn.relu(self.res3a, name='res3a') 49 | 50 | # Residual block 3b 51 | self.res3b_branch2a = tc.layers.conv2d(self.res3a, kernel_size=1, num_outputs=128, scope='res3b_branch2a') 52 | self.res3b_branch2b = tc.layers.conv2d(self.res3b_branch2a, kernel_size=3, num_outputs=128,scope='res3b_branch2b') 53 | self.res3b_branch2c = tc.layers.conv2d(self.res3b_branch2b, kernel_size=1, num_outputs=512, activation_fn=None,scope='res3b_branch2c') 54 | self.res3b = tf.add(self.res3b_branch2c, self.res3a, name='res3b_add') 55 | self.res3b = tf.nn.relu(self.res3b, name='res3b') 56 | 57 | # Residual block 3c 58 | self.res3c_branch2a = tc.layers.conv2d(self.res3b, kernel_size=1, num_outputs=128, scope='res3c_branch2a') 59 | self.res3c_branch2b = tc.layers.conv2d(self.res3c_branch2a, kernel_size=3, num_outputs=128,scope='res3c_branch2b') 60 | self.res3c_branch2c = tc.layers.conv2d(self.res3c_branch2b, kernel_size=1, num_outputs=512, activation_fn=None,scope='res3c_branch2c') 61 | self.res3c = tf.add(self.res3c_branch2c, self.res3b, name='res3c_add') 62 | self.res3c = tf.nn.relu(self.res3c, name='res3c') 63 | 64 | # Residual block 3d 65 | self.res3d_branch2a = tc.layers.conv2d(self.res3c, kernel_size=1, num_outputs=128, scope='res3d_branch2a') 66 | self.res3d_branch2b = tc.layers.conv2d(self.res3d_branch2a, kernel_size=3, num_outputs=128,scope='res3d_branch2b') 67 | self.res3d_branch2c = tc.layers.conv2d(self.res3d_branch2b, kernel_size=1, num_outputs=512, activation_fn=None,scope='res3d_branch2c') 68 | self.res3d = tf.add(self.res3d_branch2c, self.res3c, name='res3d_add') 69 | self.res3d = tf.nn.relu(self.res3d, name='res3d') 70 | 71 | # Residual block 4a 72 | self.res4a_branch2a = tc.layers.conv2d(self.res3d, kernel_size=1, num_outputs=256, stride=2, scope='res4a_branch2a') 73 | self.res4a_branch2b = tc.layers.conv2d(self.res4a_branch2a, kernel_size=3, num_outputs=256,scope='res4a_branch2b') 74 | self.res4a_branch2c = tc.layers.conv2d(self.res4a_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None,scope='res4a_branch2c') 75 | self.res4a_branch1 = tc.layers.conv2d(self.res3d, kernel_size=1, num_outputs=1024, activation_fn=None, stride=2, scope='res4a_branch1') 76 | self.res4a = tf.add(self.res4a_branch2c, self.res4a_branch1, name='res4a_add') 77 | self.res4a = tf.nn.relu(self.res4a, name='res4a') 78 | 79 | # Residual block 4b 80 | self.res4b_branch2a = tc.layers.conv2d(self.res4a, kernel_size=1, num_outputs=256, scope='res4b_branch2a') 81 | self.res4b_branch2b = tc.layers.conv2d(self.res4b_branch2a, kernel_size=3, num_outputs=256, scope='res4b_branch2b') 82 | self.res4b_branch2c = tc.layers.conv2d(self.res4b_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res4b_branch2c') 83 | self.res4b = tf.add(self.res4b_branch2c, self.res4a, name='res4b_add') 84 | self.res4b = tf.nn.relu(self.res4b, name='res4b') 85 | 86 | # Residual block 4c 87 | self.res4c_branch2a = tc.layers.conv2d(self.res4b, kernel_size=1, num_outputs=256, scope='res4c_branch2a') 88 | self.res4c_branch2b = tc.layers.conv2d(self.res4c_branch2a, kernel_size=3, num_outputs=256, scope='res4c_branch2b') 89 | self.res4c_branch2c = tc.layers.conv2d(self.res4c_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res4c_branch2c') 90 | self.res4c = tf.add(self.res4c_branch2c, self.res4b, name='res4c_add') 91 | self.res4c = tf.nn.relu(self.res4c, name='res4c') 92 | 93 | # Residual block 4d 94 | self.res4d_branch2a = tc.layers.conv2d(self.res4c, kernel_size=1, num_outputs=256, scope='res4d_branch2a') 95 | self.res4d_branch2b = tc.layers.conv2d(self.res4d_branch2a, kernel_size=3, num_outputs=256, scope='res4d_branch2b') 96 | self.res4d_branch2c = tc.layers.conv2d(self.res4d_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res4d_branch2c') 97 | self.res4d = tf.add(self.res4d_branch2c, self.res4c, name='res4d_add') 98 | self.res4d = tf.nn.relu(self.res4d, name='res4d') 99 | 100 | # Residual block 4e 101 | self.res4e_branch2a = tc.layers.conv2d(self.res4d, kernel_size=1, num_outputs=256, scope='res4e_branch2a') 102 | self.res4e_branch2b = tc.layers.conv2d(self.res4e_branch2a, kernel_size=3, num_outputs=256, scope='res4e_branch2b') 103 | self.res4e_branch2c = tc.layers.conv2d(self.res4e_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res4e_branch2c') 104 | self.res4e = tf.add(self.res4e_branch2c, self.res4d, name='res4e_add') 105 | self.res4e = tf.nn.relu(self.res4e, name='res4e') 106 | 107 | # Residual block 4f 108 | self.res4f_branch2a = tc.layers.conv2d(self.res4e, kernel_size=1, num_outputs=256, scope='res4f_branch2a') 109 | self.res4f_branch2b = tc.layers.conv2d(self.res4f_branch2a, kernel_size=3, num_outputs=256, scope='res4f_branch2b') 110 | self.res4f_branch2c = tc.layers.conv2d(self.res4f_branch2b, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res4f_branch2c') 111 | self.res4f = tf.add(self.res4f_branch2c, self.res4e, name='res4f_add') 112 | self.res4f = tf.nn.relu(self.res4f, name='res4f') 113 | 114 | # Residual block 5a 115 | self.res5a_branch2a_new = tc.layers.conv2d(self.res4f, kernel_size=1, num_outputs=512, scope='res5a_branch2a_new') 116 | self.res5a_branch2b_new = tc.layers.conv2d(self.res5a_branch2a_new, kernel_size=3, num_outputs=512, scope='res5a_branch2b_new') 117 | self.res5a_branch2c_new = tc.layers.conv2d(self.res5a_branch2b_new, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res5a_branch2c_new') 118 | self.res5a_branch1_new = tc.layers.conv2d(self.res4f, kernel_size=1, num_outputs=1024, activation_fn=None, scope='res5a_branch1_new') 119 | self.res5a = tf.add(self.res5a_branch2c_new, self.res5a_branch1_new, name='res5a_add') 120 | self.res5a = tf.nn.relu(self.res5a, name='res5a') 121 | 122 | # Residual block 5b 123 | self.res5b_branch2a_new = tc.layers.conv2d(self.res5a, kernel_size=1, num_outputs=256, scope='res5b_branch2a_new') 124 | self.res5b_branch2b_new = tc.layers.conv2d(self.res5b_branch2a_new, kernel_size=3, num_outputs=128, scope='res5b_branch2b_new') 125 | self.res5b_branch2c_new = tc.layers.conv2d(self.res5b_branch2b_new, kernel_size=1, num_outputs=256, scope='res5b_branch2c_new') 126 | 127 | # Transpose Conv 128 | self.res5c_branch1a = tf.layers.conv2d_transpose(self.res5b_branch2c_new, kernel_size=4, filters=63, activation=None, strides=2, padding='same', use_bias=False, name='res5c_branch1a') 129 | self.res5c_branch2a = tf.layers.conv2d_transpose(self.res5b_branch2c_new, kernel_size=4, filters=128, activation=None, strides=2, padding='same', use_bias=False, name='res5c_branch2a') 130 | self.bn5c_branch2a = tc.layers.batch_norm(self.res5c_branch2a, scale=True, is_training=self.is_training, scope='bn5c_branch2a') 131 | self.bn5c_branch2a = tf.nn.relu(self.bn5c_branch2a) 132 | 133 | self.res5c_delta_x, self.res5c_delta_y, self.res5c_delta_z = tf.split(self.res5c_branch1a, num_or_size_splits=3, axis=3) 134 | self.res5c_branch1a_sqr = tf.multiply(self.res5c_branch1a, self.res5c_branch1a, name='res5c_branch1a_sqr') 135 | self.res5c_delta_x_sqr, self.res5c_delta_y_sqr, self.res5c_delta_z_sqr = tf.split(self.res5c_branch1a_sqr, num_or_size_splits=3, axis=3) 136 | self.res5c_bone_length_sqr = tf.add(tf.add(self.res5c_delta_x_sqr, self.res5c_delta_y_sqr), self.res5c_delta_z_sqr) 137 | self.res5c_bone_length = tf.sqrt(self.res5c_bone_length_sqr) 138 | 139 | self.res5c_branch2a_feat = tf.concat([self.bn5c_branch2a, self.res5c_delta_x, self.res5c_delta_y, self.res5c_delta_z, self.res5c_bone_length], 140 | axis=3, name='res5c_branch2a_feat') 141 | 142 | self.res5c_branch2b = tc.layers.conv2d(self.res5c_branch2a_feat, kernel_size=3, num_outputs=128, scope='res5c_branch2b') 143 | self.res5c_branch2c = tf.layers.conv2d(self.res5c_branch2b, kernel_size=1, filters=84, activation=None, use_bias=False, name='res5c_branch2c') 144 | self.heapmap, self.x_heatmap, self.y_heatmap, self.z_heatmap = tf.split(self.res5c_branch2c, num_or_size_splits=4, axis=3) 145 | 146 | 147 | @property 148 | def all_vars(self): 149 | return tf.global_variables() 150 | 151 | 152 | def load_weights(self, sess, weight_file): 153 | # Read pretrained model file 154 | model_weights = pickle.load(open(weight_file, 'rb')) 155 | 156 | # For each layer each var 157 | with tf.variable_scope('', reuse=True): 158 | for variable in tf.global_variables(): 159 | var_name = variable.name.split(':')[0] 160 | self._assign_weights_from_dict(var_name, model_weights, sess) 161 | 162 | 163 | def _assign_weights_from_dict(self, var_name, model_weights, sess): 164 | with tf.variable_scope('', reuse=True): 165 | var_tf = tf.get_variable(var_name) 166 | # print(var_tf) 167 | sess.run(tf.assign(var_tf, model_weights[var_name])) 168 | np.testing.assert_allclose(var_tf.eval(sess), model_weights[var_name]) 169 | 170 | 171 | 172 | 173 | if __name__ == '__main__': 174 | model_file = 'vnect.pkl' 175 | model = VNect(368) 176 | 177 | 178 | with tf.Session() as sess: 179 | saver = tf.train.Saver() 180 | tf_writer = tf.summary.FileWriter(logdir='./', graph=sess.graph) 181 | 182 | sess.run(tf.global_variables_initializer()) 183 | print(model.res5b_branch2c_new) 184 | print(model.heapmap, model.x_heatmap, model.y_heatmap, model.z_heatmap) 185 | 186 | 187 | -------------------------------------------------------------------------------- /demo_multithread.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import argparse 3 | import os 4 | import cv2 5 | import numpy as np 6 | import time 7 | import matplotlib.pyplot as plt 8 | import threading 9 | import Queue 10 | from mpl_toolkits.mplot3d import Axes3D 11 | 12 | import utils 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--device', default='gpu') 16 | parser.add_argument('--model_dir', default='/media/tim_ho/HDD1/Projects/VNect-tensorflow/models') 17 | parser.add_argument('--input_size', default=368) 18 | parser.add_argument('--num_of_joints', default=21) 19 | parser.add_argument('--pool_scale', default=8) 20 | parser.add_argument('--plot_2d', default=False) 21 | parser.add_argument('--plot_3d', default=True) 22 | args = parser.parse_args() 23 | 24 | joint_color_code = [[139, 53, 255], 25 | [0, 56, 255], 26 | [43, 140, 237], 27 | [37, 168, 36], 28 | [147, 147, 0], 29 | [70, 17, 145]] 30 | 31 | # Limb parents of each joint 32 | limb_parents = [1, 15, 1, 2, 3, 1, 5, 6, 14, 8, 9, 14, 11, 12, 14, 14, 1, 4, 7, 10, 13] 33 | 34 | # Input scales 35 | scales = [1.0, 0.7] 36 | 37 | 38 | # Global vars for threads 39 | # joints_2d = np.zeros(shape=(args.num_of_joints, 2), dtype=np.int32) 40 | # joints_3d = np.zeros(shape=(args.num_of_joints, 3), dtype=np.float32) 41 | # cam_img = np.zeros(shape=(args.input_size, args.input_size, 3), dtype=np.uint8) 42 | # hm_size = args.input_size // args.pool_scale 43 | # hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 44 | # x_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 45 | # y_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 46 | # z_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 47 | 48 | # Create queue between threads 49 | cam_model_q = Queue.Queue(1) 50 | model_post_q = Queue.Queue(1) 51 | post_render_q = Queue.Queue(1) 52 | 53 | 54 | 55 | def camera_reader(): 56 | cam = cv2.VideoCapture(0) 57 | while True: 58 | t1 = time.time() 59 | cam_img = utils.read_square_image('', cam, args.input_size, 'WEBCAM') 60 | if not cam_model_q.full(): 61 | cam_model_q.put(cam_img) 62 | # print('cam put') 63 | print('Cam FPS', 1/(time.time()-t1)) 64 | 65 | 66 | 67 | def forward(): 68 | # global hm_avg, x_hm_avg, y_hm_avg, z_hm_avg 69 | cam_img = np.zeros(shape=(args.input_size, args.input_size, 3), dtype=np.uint8) 70 | joints_2d = np.zeros(shape=(args.num_of_joints, 2), dtype=np.int32) 71 | joints_3d = np.zeros(shape=(args.num_of_joints, 3), dtype=np.float32) 72 | 73 | if args.device == 'cpu': 74 | caffe.set_mode_cpu() 75 | elif args.device == 'gpu': 76 | caffe.set_mode_gpu() 77 | caffe.set_device(1) 78 | else: 79 | raise ValueError('No such device') 80 | 81 | model_prototxt_path = os.path.join(args.model_dir, 'vnect_net.prototxt') 82 | model_weight_path = os.path.join(args.model_dir, 'vnect_model.caffemodel') 83 | 84 | # Load model 85 | model = caffe.Net(model_prototxt_path, 86 | model_weight_path, 87 | caffe.TEST) 88 | 89 | # Show network structure and shape 90 | print('##################################################') 91 | print('################Network Structures################') 92 | print('##################################################') 93 | for layer_name in model.params.keys(): 94 | print(layer_name, model.params[layer_name][0].data.shape) 95 | print('') 96 | print('##################################################') 97 | print('##################################################') 98 | print('##################################################') 99 | print('\n\n\n\n') 100 | 101 | print('##################################################') 102 | print('################Input Output Blobs################') 103 | print('##################################################') 104 | for i in model.blobs.keys(): 105 | print(i, model.blobs[i].data.shape) 106 | print('##################################################') 107 | print('##################################################') 108 | print('##################################################') 109 | 110 | # cam = cv2.VideoCapture(0) 111 | is_tracking = False 112 | # for img_name in os.listdir('test_imgs'): 113 | while True: 114 | # if not is_tracking: 115 | 116 | img_path = 'test_imgs/{}'.format('dance.jpg') 117 | t1 = time.time() 118 | input_batch = [] 119 | 120 | if not cam_model_q.empty(): 121 | cam_img = cam_model_q.get() 122 | # print('forward get') 123 | # cam_img = utils.read_square_image('', cam, args.input_size, 'WEBCAM') 124 | # cam_img = utils.read_square_image(img_path, '', args.input_size, 'IMAGE') 125 | # cv2.imshow('', cam_img) 126 | # cv2.waitKey(0) 127 | orig_size_input = cam_img.astype(np.float32) 128 | 129 | for scale in scales: 130 | resized_img = utils.resize_pad_img(orig_size_input, scale, args.input_size) 131 | input_batch.append(resized_img) 132 | 133 | input_batch = np.asarray(input_batch, dtype=np.float32) 134 | input_batch = np.transpose(input_batch, (0, 3, 1, 2)) 135 | input_batch /= 255.0 136 | input_batch -= 0.4 137 | 138 | model.blobs['data'].data[...] = input_batch 139 | 140 | # Forward 141 | model.forward() 142 | 143 | # Get output data 144 | x_hm = model.blobs['x_heatmap'].data 145 | y_hm = model.blobs['y_heatmap'].data 146 | z_hm = model.blobs['z_heatmap'].data 147 | hm = model.blobs['heatmap'].data 148 | 149 | # Trans coordinates 150 | x_hm = x_hm.transpose([0, 2, 3, 1]) 151 | y_hm = y_hm.transpose([0, 2, 3, 1]) 152 | z_hm = z_hm.transpose([0, 2, 3, 1]) 153 | hm = hm.transpose([0, 2, 3, 1]) 154 | 155 | # Average scale outputs 156 | hm_size = args.input_size // args.pool_scale 157 | hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 158 | x_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 159 | y_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 160 | z_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 161 | for i in range(len(scales)): 162 | rescale = 1.0 / scales[i] 163 | scaled_hm = cv2.resize(hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 164 | scaled_x_hm = cv2.resize(x_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 165 | scaled_y_hm = cv2.resize(y_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 166 | scaled_z_hm = cv2.resize(z_hm[i, :, :, :], (0, 0), fx=rescale, fy=rescale, interpolation=cv2.INTER_LINEAR) 167 | mid = [scaled_hm.shape[0] // 2, scaled_hm.shape[1] // 2] 168 | hm_avg += scaled_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 169 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 170 | x_hm_avg += scaled_x_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 171 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 172 | y_hm_avg += scaled_y_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 173 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 174 | z_hm_avg += scaled_z_hm[mid[0] - hm_size // 2: mid[0] + hm_size // 2, 175 | mid[1] - hm_size // 2: mid[1] + hm_size // 2, :] 176 | hm_avg /= len(scales) 177 | x_hm_avg /= len(scales) 178 | y_hm_avg /= len(scales) 179 | z_hm_avg /= len(scales) 180 | 181 | t2 = time.time() 182 | # Get 2d joints 183 | joints_2d = utils.extract_2d_joint_from_heatmap(hm_avg, args.input_size, joints_2d) 184 | 185 | # Get 3d joints 186 | joints_3d = utils.extract_3d_joints_from_heatmap(joints_2d, x_hm_avg, y_hm_avg, z_hm_avg, args.input_size, 187 | joints_3d) 188 | print('Post FPS', 1/(time.time()-t2)) 189 | 190 | if not model_post_q.full(): 191 | # model_post_q.put([hm_avg, x_hm_avg, y_hm_avg, z_hm_avg, cam_img]) 192 | model_post_q.put([joints_2d, joints_3d, cam_img]) 193 | # print('forward put') 194 | print('Forward FPS', 1 / (time.time() - t1)) 195 | 196 | 197 | 198 | 199 | # Get 2d joints 200 | # joints_2d = utils.extract_2d_joint_from_heatmap(hm_avg, args.input_size, joints_2d) 201 | 202 | # Get 3d joints 203 | # joints_3d = utils.extract_3d_joints_from_heatmap(joints_2d, x_hm_avg, y_hm_avg, z_hm_avg, args.input_size, 204 | # joints_3d) 205 | 206 | 207 | # plt.show(block=False) 208 | 209 | 210 | 211 | 212 | def post_process(): 213 | # global joints_2d, joints_3d 214 | joints_2d = np.zeros(shape=(args.num_of_joints, 2), dtype=np.int32) 215 | joints_3d = np.zeros(shape=(args.num_of_joints, 3), dtype=np.float32) 216 | hm_size = args.input_size // args.pool_scale 217 | hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 218 | x_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 219 | y_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 220 | z_hm_avg = np.zeros(shape=(hm_size, hm_size, args.num_of_joints)) 221 | cam_img = np.zeros(shape=(args.input_size, args.input_size, 3), dtype=np.uint8) 222 | 223 | 224 | while True: 225 | if not model_post_q.empty(): 226 | [hm_avg, x_hm_avg, y_hm_avg, z_hm_avg, cam_img] = model_post_q.get(False) 227 | # print('post get') 228 | 229 | 230 | t1 = time.time() 231 | # Get 2d joints 232 | joints_2d = utils.extract_2d_joint_from_heatmap(hm_avg, args.input_size, joints_2d) 233 | 234 | # Get 3d joints 235 | if args.plot_3d: 236 | joints_3d = utils.extract_3d_joints_from_heatmap(joints_2d, x_hm_avg, y_hm_avg, z_hm_avg, args.input_size, 237 | joints_3d) 238 | print('Post FPS', 1/(time.time()-t1)) 239 | 240 | if not post_render_q.full(): 241 | post_render_q.put([joints_2d, joints_3d, cam_img]) 242 | # print('post put') 243 | 244 | 245 | 246 | 247 | def render_plt(): 248 | joints_2d = np.zeros(shape=(args.num_of_joints, 2), dtype=np.int32) 249 | joints_3d = np.zeros(shape=(args.num_of_joints, 3), dtype=np.float32) 250 | cam_img = np.zeros(shape=(args.input_size, args.input_size, 3), dtype=np.uint8) 251 | 252 | if args.plot_3d and args.plot_2d: 253 | plt.ion() 254 | fig = plt.figure(figsize=(10,10)) 255 | ax = fig.add_subplot(121, projection='3d') 256 | ax2 = fig.add_subplot(122) 257 | plt.show() 258 | elif args.plot_3d: 259 | plt.ion() 260 | fig = plt.figure(figsize=(10,10)) 261 | ax = fig.add_subplot(111, projection='3d') 262 | 263 | 264 | while True: 265 | 266 | if model_post_q.qsize() != 0: 267 | [joints_2d, joints_3d, cam_img] = model_post_q.get(False) 268 | else: 269 | print('render old') 270 | 271 | t1 = time.time() 272 | # Plot 2d location heatmap 273 | if args.plot_2d: 274 | joint_map = np.zeros(shape=(args.input_size, args.input_size, 3)) 275 | for joint_num in range(joints_2d.shape[0]): 276 | cv2.circle(joint_map, center=(joints_2d[joint_num][1], joints_2d[joint_num][0]), radius=3, 277 | color=(255, 0, 0), thickness=-1) 278 | 279 | # Plot 2d limbs 280 | limb_img = utils.draw_limbs_2d(cam_img, joints_2d, limb_parents) 281 | 282 | # Plot 3d limbs 283 | if args.plot_3d: 284 | ax.clear() 285 | ax.view_init(azim=0, elev=90) 286 | ax.set_xlim(-700, 700) 287 | ax.set_ylim(-800, 800) 288 | ax.set_zlim(-700, 700) 289 | ax.set_xlabel('x') 290 | ax.set_ylabel('y') 291 | ax.set_zlabel('z') 292 | utils.draw_limbs_3d(joints_3d, limb_parents, ax) 293 | 294 | # draw heatmap 295 | # hm_img = utils.draw_predicted_heatmap(hm_avg*200, args.input_size) 296 | # cv2.imshow('hm', hm_img.astype(np.uint8)) 297 | # cv2.waitKey(0) 298 | 299 | if args.plot_2d and args.plot_3d: 300 | concat_img = np.concatenate((limb_img, joint_map), axis=1) 301 | ax2.imshow(concat_img[..., ::-1].astype(np.uint8)) 302 | plt.pause(1e-10) 303 | elif args.plot_3d: 304 | plt.pause(1e-10) 305 | else: 306 | concat_img = np.concatenate((limb_img, joint_map), axis=1) 307 | cv2.imshow('2d', concat_img.astype(np.uint8)) 308 | cv2.waitKey(1) 309 | # ax2.imshow(concat_img.astype(np.uint8)) 310 | print('Render FPS', 1 / (time.time() - t1)) 311 | 312 | 313 | 314 | 315 | 316 | if __name__ == '__main__': 317 | t1 = threading.Thread(target=camera_reader, name='cam_thread') 318 | t2 = threading.Thread(target=forward, name='model_thread') 319 | # t3 = threading.Thread(target=post_process, name='post_process_thread') 320 | t4 = threading.Thread(target=render_plt, name='render_thread') 321 | 322 | t1.start() 323 | t2.start() 324 | # t3.start() 325 | t4.start() 326 | --------------------------------------------------------------------------------