├── 解压data.zip到这里.txt ├── 解压utils.zip到这里.txt ├── components ├── FCRN_DepthPrediction_vmd │ ├── tensorflow │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── fcrn.cpython-36.pyc │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ └── network.cpython-36.pyc │ │ │ ├── fcrn.py │ │ │ └── network.py │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── api.cpython-36.pyc │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── predict_video.cpython-36.pyc │ │ ├── predict.py │ │ ├── predict_video.py │ │ └── api.py │ ├── matlab │ │ ├── setupMatConvNet.m │ │ ├── error_metrics.m │ │ ├── +dagnn │ │ │ └── Combine.m │ │ ├── DepthMapPrediction.m │ │ ├── evaluateNYU.m │ │ └── evaluateMake3D.m │ └── VideoToDepth.bat ├── Three_D_PoseBaseline_multi │ ├── packages │ │ └── lifting │ │ │ ├── __init__.py │ │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── draw.py │ │ │ ├── prob_model.py │ │ │ ├── upright_fast.py │ │ │ ├── process.py │ │ │ └── cpm.py │ │ │ └── _pose_estimator.py │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-36.pyc │ └── applications │ │ ├── __pycache__ │ │ ├── api.cpython-36.pyc │ │ ├── __init__.cpython-36.pyc │ │ ├── VmdWriter.cpython-36.pyc │ │ └── pos2vmd_multi.cpython-36.pyc │ │ ├── __init__.py │ │ ├── demo.py │ │ ├── VmdWriter.py │ │ ├── vmdlifting.py │ │ ├── vmdlifting_multi.py │ │ ├── head_face.py │ │ └── pos2vmd.py └── Three_D_PoseBaseline_vmd │ ├── __init__.py │ ├── __pycache__ │ ├── api.cpython-36.pyc │ ├── viz.cpython-36.pyc │ ├── __init__.cpython-36.pyc │ ├── cameras.cpython-36.pyc │ ├── data_utils.cpython-36.pyc │ ├── procrustes.cpython-36.pyc │ ├── linear_model.cpython-36.pyc │ └── predict_3dpose.cpython-36.pyc │ ├── procrustes.py │ ├── viz.py │ ├── cameras.py │ ├── openpose_3dpose_sandbox_realtime.py │ ├── openpose_3dpose_sandbox.py │ └── linear_model.py ├── requirements.txt ├── README.md ├── launch.py └── LICENSE /解压data.zip到这里.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /解压utils.zip到这里.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcrn import ResNet50UpProj 2 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/packages/lifting/__init__.py: -------------------------------------------------------------------------------- 1 | from ._pose_estimator import * 2 | from . import utils 3 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | com_path = os.path.join(os.getcwd(), "components", "Three_D_PoseBaseline_multi") -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | com_path = os.path.join(os.getcwd(), "components", "Three_D_PoseBaseline_vmd") -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | com_path = os.path.join(os.getcwd(), "components", "FCRN_DepthPrediction_vmd", "tensorflow") -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/__pycache__/api.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_vmd/__pycache__/api.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/__pycache__/viz.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_vmd/__pycache__/viz.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_vmd/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/__pycache__/cameras.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_vmd/__pycache__/cameras.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_multi/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/__pycache__/data_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_vmd/__pycache__/data_utils.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/__pycache__/procrustes.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_vmd/__pycache__/procrustes.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/__pycache__/linear_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_vmd/__pycache__/linear_model.cpython-36.pyc -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/__pycache__/api.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/FCRN_DepthPrediction_vmd/tensorflow/__pycache__/api.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/__pycache__/predict_3dpose.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_vmd/__pycache__/predict_3dpose.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/applications/__pycache__/api.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_multi/applications/__pycache__/api.cpython-36.pyc -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/FCRN_DepthPrediction_vmd/tensorflow/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/models/__pycache__/fcrn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/FCRN_DepthPrediction_vmd/tensorflow/models/__pycache__/fcrn.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/applications/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_multi/applications/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/__pycache__/predict_video.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/FCRN_DepthPrediction_vmd/tensorflow/__pycache__/predict_video.cpython-36.pyc -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/FCRN_DepthPrediction_vmd/tensorflow/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/models/__pycache__/network.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/FCRN_DepthPrediction_vmd/tensorflow/models/__pycache__/network.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/applications/__pycache__/VmdWriter.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_multi/applications/__pycache__/VmdWriter.cpython-36.pyc -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/applications/__pycache__/pos2vmd_multi.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huahuahuage/OpenMMD/HEAD/components/Three_D_PoseBaseline_multi/applications/__pycache__/pos2vmd_multi.cpython-36.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | dlib==19.22.1 2 | h5py==2.10.0 3 | imageio==2.9.0 4 | matplotlib==3.0.0 5 | numpy==1.16.4 6 | opencv_python==4.5.3.56 7 | Pillow==9.0.0 8 | PyQt5==5.15.6 9 | scipy==1.7.3 10 | six==1.16.0 11 | skimage==0.0 12 | tqdm==4.62.3 -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/packages/lifting/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mar 23 13:57 2017 4 | 5 | @author: Denis Tome' 6 | """ 7 | from .prob_model import * 8 | from .draw import * 9 | from .cpm import * 10 | from .process import * 11 | from . import config 12 | from . import upright_fast 13 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/applications/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from os.path import dirname, realpath 3 | 4 | 5 | dir_path = dirname(realpath(__file__)) 6 | project_path = realpath(dir_path + '/..') 7 | 8 | libs_dir_path = project_path + '/packages' 9 | 10 | # Adding where to find libraries and dependencies 11 | sys.path.append(libs_dir_path) 12 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/packages/lifting/utils/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mar 23 11:57 2017 4 | 5 | @author: Denis Tome' 6 | """ 7 | 8 | __all__ = [ 9 | 'VISIBLE_PART', 10 | 'MIN_NUM_JOINTS', 11 | 'CENTER_TR', 12 | 'SIGMA', 13 | 'STRIDE', 14 | 'SIGMA_CENTER', 15 | 'INPUT_SIZE', 16 | 'OUTPUT_SIZE', 17 | 'NUM_JOINTS', 18 | 'NUM_OUTPUT', 19 | 'H36M_NUM_JOINTS', 20 | 'JOINT_DRAW_SIZE', 21 | 'LIMB_DRAW_SIZE' 22 | ] 23 | 24 | # threshold 25 | VISIBLE_PART = 1e-3 26 | MIN_NUM_JOINTS = 5 27 | CENTER_TR = 0.4 28 | 29 | # net attributes 30 | SIGMA = 7 31 | STRIDE = 8 32 | SIGMA_CENTER = 21 33 | INPUT_SIZE = 368 34 | OUTPUT_SIZE = 46 35 | NUM_JOINTS = 14 36 | NUM_OUTPUT = NUM_JOINTS + 1 37 | H36M_NUM_JOINTS = 17 38 | 39 | # draw options 40 | JOINT_DRAW_SIZE = 3 41 | LIMB_DRAW_SIZE = 2 42 | NORMALISATION_COEFFICIENT = 1280*720 43 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/matlab/setupMatConvNet.m: -------------------------------------------------------------------------------- 1 | function setupMatConvNet(matconvnet_path) 2 | 3 | % check path 4 | if ~exist(fullfile(matconvnet_path, 'matlab', 'vl_setupnn.m'), 'file') 5 | error('Count not find MatConvNet in "%s"!\nPlease point matcovnet_path to the correct directory.\n', matconvnet_path); 6 | end 7 | 8 | % check if it is the right version (beta-20) 9 | mcnv = getMatConvNetVersion(matconvnet_path); 10 | if ~strcmp(mcnv, '1.0-beta20') 11 | error('Your MatConvNet version (%s) is not the required version 1.0-beta20. Please download and compile the right version.', mcnv); 12 | end 13 | 14 | % if everything is fine, then set up 15 | run(fullfile(matconvnet_path, 'matlab', 'vl_setupnn.m')); 16 | 17 | 18 | 19 | function versionName = getMatConvNetVersion(matconvnet_path) 20 | 21 | fid = fopen(fullfile(matconvnet_path, 'Makefile'), 'rt'); 22 | s = textscan(fid, '%s', 'delimiter', '\n'); 23 | fclose(fid); 24 | idxs = find(~cellfun(@isempty,strfind(s{1}, 'VER = '))); 25 | mcnVersion = s{1}(idxs(1)); 26 | versionName = mcnVersion{1}(7:end); -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/matlab/error_metrics.m: -------------------------------------------------------------------------------- 1 | function results = error_metrics(pred, gt, mask) 2 | 3 | % Compute error metrics on benchmark datasets 4 | % ------------------------------------------------------------------------- 5 | 6 | % make sure predictions and ground truth have same dimensions 7 | if size(pred) ~= size(gt) 8 | pred = imresize(pred, [size(gt,1), size(gt,2)], 'bilinear'); 9 | end 10 | 11 | if isempty(mask) 12 | n_pxls = numel(gt); 13 | else 14 | n_pxls = sum(mask(:)); % average over valid pixels only 15 | end 16 | 17 | fprintf('\n Errors computed over the entire test set \n'); 18 | fprintf('------------------------------------------\n'); 19 | 20 | % Mean Absolute Relative Error 21 | rel = abs(gt(:) - pred(:)) ./ gt(:); % compute errors 22 | rel(~mask) = 0; % mask out invalid ground truth pixels 23 | rel = sum(rel) / n_pxls; % average over all pixels 24 | fprintf('Mean Absolute Relative Error: %4f\n', rel); 25 | 26 | % Root Mean Squared Error 27 | rms = (gt(:) - pred(:)).^2; 28 | rms(~mask) = 0; 29 | rms = sqrt(sum(rms) / n_pxls); 30 | fprintf('Root Mean Squared Error: %4f\n', rms); 31 | 32 | % LOG10 Error 33 | lg10 = abs(log10(gt(:)) - log10(pred(:))); 34 | lg10(~mask) = 0; 35 | lg10 = sum(lg10) / n_pxls ; 36 | fprintf('Mean Log10 Error: %4f\n', lg10); 37 | 38 | results.rel = rel; 39 | results.rms = rms; 40 | results.log10 = lg10; 41 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/VideoToDepth.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | 4 | cd /d %~dp0 5 | 6 | echo Analysing the object depth from the video 7 | 8 | set INPUT_VIDEO= 9 | set /P INPUT_VIDEO=■Your video path: 10 | rem echo INPUT_VIDEO:%INPUT_VIDEO% 11 | 12 | IF /I "%INPUT_VIDEO%" EQU "" ( 13 | ECHO Error input. Program terminates. 14 | EXIT /B 15 | ) 16 | 17 | 18 | echo Please input the result folder after the execution of 3d-pose-baseline-vmd model. 19 | set TARGET_BASELINE_DIR= 20 | set /P TARGET_BASELINE_DIR=■3D keypoints extraction result path: 21 | rem echo TARGET_DIR:%TARGET_DIR% 22 | 23 | IF /I "%TARGET_BASELINE_DIR%" EQU "" ( 24 | ECHO Error input. Program terminates. 25 | EXIT /B 26 | ) 27 | 28 | 29 | echo -------------- 30 | set DEPTH_INTERVAL=10 31 | echo Please set the depth interval. Smaller it is, clearer the results are. 32 | echo Press Enter to set to Default: depth = 10. 33 | set /P DEPTH_INTERVAL="Depth interval: " 34 | 35 | 36 | echo -------------- 37 | echo Please input yes or no to decide whether you want to debug. 38 | echo Press Enter to set to default Debug Mode. 39 | set VERBOSE=2 40 | set IS_DEBUG=no 41 | set /P IS_DEBUG="[yes/no]: " 42 | 43 | IF /I "%IS_DEBUG%" EQU "yes" ( 44 | set VERBOSE=3 45 | ) 46 | 47 | python tensorflow/predict_video.py --model_path tensorflow/data/NYU_FCRN.ckpt --video_path %INPUT_VIDEO% --baseline_path %TARGET_BASELINE_DIR% --interval %DEPTH_INTERVAL% --verbose %VERBOSE% 48 | 49 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/matlab/+dagnn/Combine.m: -------------------------------------------------------------------------------- 1 | classdef Combine < dagnn.ElementWise 2 | 3 | methods 4 | function outputs = forward(self, inputs, params) 5 | %double the size of feature maps, combining four responses 6 | Y = zeros(size(inputs{1},1)*2, size(inputs{1},2)*2, size(inputs{1},3), size(inputs{1},4), 'like', inputs{1}); 7 | Y(1:2:end, 1:2:end, :, :) = inputs{1}; %A 8 | Y(2:2:end, 1:2:end, :, :) = inputs{2}; %C 9 | Y(1:2:end, 2:2:end, :, :) = inputs{3}; %B 10 | Y(2:2:end, 2:2:end, :, :) = inputs{4}; %D 11 | outputs{1} = Y; 12 | end 13 | 14 | function [derInputs, derParams] = backward(self, inputs, params, derOutputs) 15 | %split the feature map into four feature maps of half size 16 | derInputs{1} = derOutputs{1}(1:2:end, 1:2:end, :, :); 17 | derInputs{2} = derOutputs{1}(2:2:end, 1:2:end, :, :); 18 | derInputs{3} = derOutputs{1}(1:2:end, 2:2:end, :, :); 19 | derInputs{4} = derOutputs{1}(2:2:end, 2:2:end, :, :); 20 | derParams = {} ; 21 | end 22 | 23 | function outputSizes = getOutputSizes(obj, inputSizes) 24 | outputSizes{1}(1) = 2*inputSizes{1}(1); 25 | outputSizes{1}(2) = 2*inputSizes{1}(2); 26 | outputSizes{1}(3) = inputSizes{1}(3); 27 | outputSizes{1}(4) = inputSizes{1}(4); 28 | end 29 | 30 | function obj = Combine(varargin) 31 | obj.load(varargin) ; 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/procrustes.py: -------------------------------------------------------------------------------- 1 | 2 | def compute_similarity_transform(X, Y, compute_optimal_scale=False): 3 | """ 4 | A port of MATLAB's `procrustes` function to Numpy. 5 | Adapted from http://stackoverflow.com/a/18927641/1884420 6 | 7 | Args 8 | X: array NxM of targets, with N number of points and M point dimensionality 9 | Y: array NxM of inputs 10 | compute_optimal_scale: whether we compute optimal scale or force it to be 1 11 | 12 | Returns: 13 | d: squared error after transformation 14 | Z: transformed Y 15 | T: computed rotation 16 | b: scaling 17 | c: translation 18 | """ 19 | import numpy as np 20 | 21 | muX = X.mean(0) 22 | muY = Y.mean(0) 23 | 24 | X0 = X - muX 25 | Y0 = Y - muY 26 | 27 | ssX = (X0**2.).sum() 28 | ssY = (Y0**2.).sum() 29 | 30 | # centred Frobenius norm 31 | normX = np.sqrt(ssX) 32 | normY = np.sqrt(ssY) 33 | 34 | # scale to equal (unit) norm 35 | X0 = X0 / normX 36 | Y0 = Y0 / normY 37 | 38 | # optimum rotation matrix of Y 39 | A = np.dot(X0.T, Y0) 40 | U,s,Vt = np.linalg.svd(A,full_matrices=False) 41 | V = Vt.T 42 | T = np.dot(V, U.T) 43 | 44 | # Make sure we have a rotation 45 | detT = np.linalg.det(T) 46 | V[:,-1] *= np.sign( detT ) 47 | s[-1] *= np.sign( detT ) 48 | T = np.dot(V, U.T) 49 | 50 | traceTA = s.sum() 51 | 52 | if compute_optimal_scale: # Compute optimum scaling of Y. 53 | b = traceTA * normX / normY 54 | d = 1 - traceTA**2 55 | Z = normX*traceTA*np.dot(Y0, T) + muX 56 | else: # If no scaling allowed 57 | b = 1 58 | d = 1 + ssY/ssX - 2 * traceTA * normY / normX 59 | Z = normY*np.dot(Y0, T) + muX 60 | 61 | c = muX - b*np.dot(muY, T) 62 | 63 | return d, Z, T, b, c 64 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/applications/demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Dec 20 17:39 2016 5 | 6 | @author: Denis Tome' 7 | """ 8 | 9 | import __init__ 10 | 11 | from lifting import PoseEstimator 12 | from lifting.utils import draw_limbs 13 | from lifting.utils import plot_pose 14 | 15 | import cv2 16 | import matplotlib.pyplot as plt 17 | from os.path import dirname, realpath 18 | 19 | DIR_PATH = dirname(realpath(__file__)) 20 | PROJECT_PATH = realpath(DIR_PATH + '/..') 21 | IMAGE_FILE_PATH = PROJECT_PATH + '/data/images/test_image.png' 22 | SAVED_SESSIONS_DIR = PROJECT_PATH + '/data/saved_sessions' 23 | SESSION_PATH = SAVED_SESSIONS_DIR + '/init_session/init' 24 | PROB_MODEL_PATH = SAVED_SESSIONS_DIR + '/prob_model/prob_model_params.mat' 25 | 26 | 27 | def main(): 28 | image = cv2.imread(IMAGE_FILE_PATH) 29 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # conversion to rgb 30 | 31 | # create pose estimator 32 | image_size = image.shape 33 | 34 | pose_estimator = PoseEstimator(image_size, SESSION_PATH, PROB_MODEL_PATH) 35 | 36 | # load model 37 | pose_estimator.initialise() 38 | 39 | # estimation 40 | pose_2d, visibility, pose_3d = pose_estimator.estimate(image) 41 | 42 | # close model 43 | pose_estimator.close() 44 | 45 | # Show 2D and 3D poses 46 | display_results(image, pose_2d, visibility, pose_3d) 47 | 48 | 49 | def display_results(in_image, data_2d, joint_visibility, data_3d): 50 | """Plot 2D and 3D poses for each of the people in the image.""" 51 | plt.figure() 52 | draw_limbs(in_image, data_2d, joint_visibility) 53 | plt.imshow(in_image) 54 | plt.axis('off') 55 | 56 | # Show 3D poses 57 | for single_3D in data_3d: 58 | # or plot_pose(Prob3dPose.centre_all(single_3D)) 59 | plot_pose(single_3D) 60 | 61 | plt.show() 62 | 63 | if __name__ == '__main__': 64 | import sys 65 | sys.exit(main()) 66 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/predict.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import numpy as np 4 | import tensorflow as tf 5 | from matplotlib import pyplot as plt 6 | from PIL import Image 7 | 8 | import models 9 | 10 | def predict(model_data_path, image_path): 11 | 12 | 13 | # Default input size 14 | height = 228 15 | width = 304 16 | channels = 3 17 | batch_size = 1 18 | 19 | # Read image 20 | img = Image.open(image_path) 21 | img = img.resize([width,height], Image.ANTIALIAS) 22 | img = np.array(img).astype('float32') 23 | img = np.expand_dims(np.asarray(img), axis = 0) 24 | 25 | # Create a placeholder for the input image 26 | input_node = tf.placeholder(tf.float32, shape=(None, height, width, channels)) 27 | 28 | # Construct the network 29 | net = models.ResNet50UpProj({'data': input_node}, batch_size, 1, False) 30 | 31 | with tf.Session() as sess: 32 | 33 | # Load the converted parameters 34 | print('Loading the model') 35 | 36 | # Use to load from ckpt file 37 | saver = tf.train.Saver() 38 | saver.restore(sess, model_data_path) 39 | 40 | # Use to load from npy file 41 | #net.load(model_data_path, sess) 42 | 43 | # Evalute the network for the given image 44 | pred = sess.run(net.get_output(), feed_dict={input_node: img}) 45 | 46 | # Plot result 47 | fig = plt.figure() 48 | ii = plt.imshow(pred[0,:,:,0], interpolation='nearest') 49 | fig.colorbar(ii) 50 | plt.show() 51 | 52 | return pred 53 | 54 | 55 | def main(): 56 | # Parse arguments 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument('model_path', help='Converted parameters for the model') 59 | parser.add_argument('image_paths', help='Directory of images to predict') 60 | args = parser.parse_args() 61 | 62 | # Predict the image 63 | pred = predict(args.model_path, args.image_paths) 64 | 65 | os._exit(0) 66 | 67 | if __name__ == '__main__': 68 | main() 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenMMD 2 | OpenMMD可以称为OpenPose+MikuMikuDance(MMD),是一个基于OpenPose的深度学习项目,可以直接将真人视频动作转换为MMD的VMD模型文件(即Miku、Anmicius)。 3 | 4 | 简而言之,你录制了一段带有人体运动的视频,通过这个项目,你可以提取到一个与你在视频中的动作相同的模型文件,且无需专业动捕设备。 5 | 6 | ## 运行要求 7 | * 操作系统:windows 8.0 及以上 8 | * python 版本 >= 3.6 9 | * NVIDIA graphics card with at least 1.6 GB available 10 | * At least 2.5 GB of free RAM memory for BODY_25 model or 2 GB for COCO model (assuming cuDNN installed). 11 | * Highly recommended: cuDNN 12 | 13 | ## 安装与使用 14 | * 下载模型 data.zip 和openpose工具 utils.zip,并解压到项目根目录([百度网盘](https://pan.baidu.com/s/1V4ldr5FuwpWAHgaUv9hhYA),提取码:1dmt)。 15 | * pip install -r requirements.txt 16 | * python launch.py 17 | 18 | ## 项目相关引用 19 | * [peterljq/OpenMMD](https://github.com/peterljq/OpenMMD) 20 | 21 | OpenMMD is an OpenPose-based application that can convert real-person videos to the motion files (.vmd) which directly implement the 3D model (e.g. Miku, Anmicius) animated movies. 22 | 23 | 24 | * [CMU-Perceptual-Computing-Lab/openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) 25 | 26 | OpenPose has represented the first real-time multi-person system to jointly detect human body, hand, facial, and foot keypoints (in total 135 keypoints) on single images. 27 | 28 | It is authored by Ginés Hidalgo, Zhe Cao, Tomas Simon, Shih-En Wei, Yaadhav Raaj, Hanbyul Joo, and Yaser Sheikh. It is maintained by Ginés Hidalgo and Yaadhav Raaj. OpenPose would not be possible without the CMU Panoptic Studio dataset. We would also like to thank all the people who has helped OpenPose in any way. 29 | 30 | 31 | * [una-dinosauria/3d-pose-baseline](https://github.com/una-dinosauria/3d-pose-baseline) 32 | 33 | This is the code for the paper 34 | 35 | Julieta Martinez, Rayat Hossain, Javier Romero, James J. Little. A simple yet effective baseline for 3d human pose estimation. In ICCV, 2017. [https://arxiv.org/pdf/1705.03098.pdf](https://arxiv.org/pdf/1705.03098.pdf). 36 | 37 | The code in this repository was mostly written by Julieta Martinez, Rayat Hossain and Javier Romero. 38 | 39 | 40 | * [iro-cp/FCRN-DepthPrediction](https://github.com/iro-cp/FCRN-DepthPrediction) 41 | 42 | Deeper Depth Prediction with Fully Convolutional Residual Networks 43 | 44 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/applications/VmdWriter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import struct 4 | from PyQt5.QtGui import QQuaternion, QVector3D 5 | 6 | class VmdBoneFrame(): 7 | def __init__(self, frame=0): 8 | self.name = '' 9 | self.frame = frame 10 | self.position = QVector3D(0, 0, 0) 11 | self.rotation = QQuaternion() 12 | 13 | def write(self, fout): 14 | fout.write(self.name) 15 | fout.write(bytearray([0 for i in range(len(self.name), 15)])) # ボーン名15Byteの残りを\0で埋める 16 | fout.write(struct.pack('3 ): 90 | dump_file = sys.argv[3] 91 | 92 | vmdlifting(image_file, vmd_file, dump_file) 93 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/matlab/DepthMapPrediction.m: -------------------------------------------------------------------------------- 1 | function pred = DepthMapPrediction(imdb, net, varargin) 2 | 3 | % Depth prediction (inference) using a trained model. 4 | % Inputs (imdb) can be either from the NYUDepth_v2 or Make3D dataset, along 5 | % with the corresponding trained model (net). Additionally, the evaluation 6 | % can be run for any single image. MatConvNet library has to be already 7 | % setup for this function to work properly. 8 | % ------------------------------------------------------------------------- 9 | % Inputs: 10 | % - imdb: a structure with fields 'images' and 'depths' in the case of 11 | % the benchmark datasets with known ground truth. imdb could 12 | % alternatively be any single RGB image of size NxMx3 in [0,255] 13 | % or a tensor of D input images NxMx3xD. 14 | % - net: a trained model of type struct (suitable to be converted to a 15 | % DagNN object and successively processed using the DagNN 16 | % wrapper). For testing on arbitrary images, use NYU model for 17 | % indoor and Make3D model for outdoor scenes respectively. 18 | % ------------------------------------------------------------------------- 19 | 20 | opts.gpu = false; % Set to true (false) for GPU (CPU only) support 21 | opts.plot = false; % Set to true to visualize the predictions during inference 22 | opts = vl_argparse(opts, varargin); 23 | 24 | % Set network properties 25 | net = dagnn.DagNN.loadobj(net); 26 | net.mode = 'test'; 27 | out = net.getVarIndex('prediction'); 28 | if opts.gpu 29 | net.move('gpu'); 30 | end 31 | 32 | % Check input 33 | if isa(imdb, 'struct') 34 | % case of benchmark datasets (NYU, Make3D) 35 | images = imdb.images; 36 | groundTruth = imdb.depths; 37 | else 38 | % case of arbitrary image(s) 39 | images = imdb; 40 | images = imresize(images, net.meta.normalization.imageSize(1:2)); 41 | groundTruth = []; 42 | end 43 | 44 | % Get output size for initialization 45 | varSizes = net.getVarSizes({'data', net.meta.normalization.imageSize}); % get variable sizes 46 | pred = zeros(varSizes{out}(1), varSizes{out}(2), varSizes{out}(3), size(images, 4)); % initiliaze 47 | 48 | if opts.plot, figure(); end 49 | 50 | fprintf('predicting...\n'); 51 | for i = 1:size(images, 4) 52 | % get input image 53 | im = single(images(:,:,:,i)); 54 | if opts.gpu 55 | im = gpuArray(im); 56 | end 57 | 58 | % run the CNN 59 | inputs = {'data', im}; 60 | net.eval(inputs) ; 61 | 62 | % obtain prediction 63 | pred(:,:,i) = gather(net.vars(out).value); 64 | 65 | % visualize results 66 | if opts.plot 67 | colormap jet 68 | if ~isempty(groundTruth) 69 | subplot(1,3,1), imagesc(uint8(images(:,:,:,i))), title('RGB Input'), axis off 70 | subplot(1,3,2), imagesc(groundTruth(:,:,i)), title('Depth Ground Truth'), axis off 71 | subplot(1,3,3), imagesc(pred(:,:,i)), title('Depth Prediction'), axis off 72 | else 73 | subplot(1,2,1), imagesc(uint8(images(:,:,:,i))), title('RGB Input'), axis off 74 | subplot(1,2,2), imagesc(pred(:,:,i)), title('Depth Prediction'), axis off 75 | end 76 | drawnow; 77 | end 78 | end 79 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/packages/lifting/utils/draw.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mar 23 15:04 2017 4 | 5 | @author: Denis Tome' 6 | """ 7 | import cv2 8 | import numpy as np 9 | from .config import JOINT_DRAW_SIZE 10 | from .config import LIMB_DRAW_SIZE 11 | from .config import NORMALISATION_COEFFICIENT 12 | import matplotlib.pyplot as plt 13 | import math 14 | 15 | __all__ = [ 16 | 'draw_limbs', 17 | 'plot_pose' 18 | ] 19 | 20 | 21 | def draw_limbs(image, pose_2d, visible): 22 | """Draw the 2D pose without the occluded/not visible joints.""" 23 | 24 | _COLORS = [ 25 | [0, 0, 255], [0, 170, 255], [0, 255, 170], [0, 255, 0], 26 | [170, 255, 0], [255, 170, 0], [255, 0, 0], [255, 0, 170], 27 | [170, 0, 255] 28 | ] 29 | _LIMBS = np.array([0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 30 | 9, 10, 11, 12, 12, 13]).reshape((-1, 2)) 31 | 32 | _NORMALISATION_FACTOR = int(math.floor(math.sqrt(image.shape[0] * image.shape[1] / NORMALISATION_COEFFICIENT))) 33 | 34 | for oid in range(pose_2d.shape[0]): 35 | for lid, (p0, p1) in enumerate(_LIMBS): 36 | if not (visible[oid][p0] and visible[oid][p1]): 37 | continue 38 | y0, x0 = pose_2d[oid][p0] 39 | y1, x1 = pose_2d[oid][p1] 40 | cv2.circle(image, (x0, y0), JOINT_DRAW_SIZE *_NORMALISATION_FACTOR , _COLORS[lid], -1) 41 | cv2.circle(image, (x1, y1), JOINT_DRAW_SIZE*_NORMALISATION_FACTOR , _COLORS[lid], -1) 42 | cv2.line(image, (x0, y0), (x1, y1), 43 | _COLORS[lid], LIMB_DRAW_SIZE*_NORMALISATION_FACTOR , 16) 44 | 45 | 46 | def plot_pose(pose): 47 | """Plot the 3D pose showing the joint connections.""" 48 | import mpl_toolkits.mplot3d.axes3d as p3 49 | 50 | _CONNECTION = [ 51 | [0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], 52 | [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], 53 | [15, 16]] 54 | 55 | def joint_color(j): 56 | """ 57 | TODO: 'j' shadows name 'j' from outer scope 58 | """ 59 | 60 | colors = [(0, 0, 0), (255, 0, 255), (0, 0, 255), 61 | (0, 255, 255), (255, 0, 0), (0, 255, 0)] 62 | _c = 0 63 | if j in range(1, 4): 64 | _c = 1 65 | if j in range(4, 7): 66 | _c = 2 67 | if j in range(9, 11): 68 | _c = 3 69 | if j in range(11, 14): 70 | _c = 4 71 | if j in range(14, 17): 72 | _c = 5 73 | return colors[_c] 74 | 75 | assert (pose.ndim == 2) 76 | assert (pose.shape[0] == 3) 77 | fig = plt.figure() 78 | ax = fig.gca(projection='3d') 79 | for c in _CONNECTION: 80 | col = '#%02x%02x%02x' % joint_color(c[0]) 81 | ax.plot([pose[0, c[0]], pose[0, c[1]]], 82 | [pose[1, c[0]], pose[1, c[1]]], 83 | [pose[2, c[0]], pose[2, c[1]]], c=col) 84 | for j in range(pose.shape[1]): 85 | col = '#%02x%02x%02x' % joint_color(j) 86 | ax.scatter(pose[0, j], pose[1, j], pose[2, j], 87 | c=col, marker='o', edgecolor=col) 88 | smallest = pose.min() 89 | largest = pose.max() 90 | ax.set_xlim3d(smallest, largest) 91 | ax.set_ylim3d(smallest, largest) 92 | ax.set_zlim3d(smallest, largest) 93 | 94 | return fig 95 | 96 | 97 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/viz.py: -------------------------------------------------------------------------------- 1 | 2 | """Functions to visualize human poses""" 3 | 4 | import matplotlib.pyplot as plt 5 | from . import data_utils 6 | import numpy as np 7 | import h5py 8 | import os 9 | from mpl_toolkits.mplot3d import Axes3D 10 | 11 | def show3Dpose(channels, ax, lcolor="#3498db", rcolor="#e74c3c", add_labels=False): # blue, orange 12 | """ 13 | Visualize a 3d skeleton 14 | 15 | Args 16 | channels: 96x1 vector. The pose to plot. 17 | ax: matplotlib 3d axis to draw on 18 | lcolor: color for left part of the body 19 | rcolor: color for right part of the body 20 | add_labels: whether to add coordinate labels 21 | Returns 22 | Nothing. Draws on ax. 23 | """ 24 | 25 | assert channels.size == len(data_utils.H36M_NAMES)*3, "channels should have 96 entries, it has %d instead" % channels.size 26 | vals = np.reshape( channels, (len(data_utils.H36M_NAMES), -1) ) 27 | 28 | I = np.array([1,2,3,1,7,8,1, 13,14,15,14,18,19,14,26,27])-1 # start points 29 | J = np.array([2,3,4,7,8,9,13,14,15,16,18,19,20,26,27,28])-1 # end points 30 | LR = np.array([1,1,1,0,0,0,0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=bool) 31 | # Make connection matrix 32 | for i in np.arange( len(I) ): 33 | x, y, z = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(3)] 34 | ax.plot(x, y, z, marker='o', markersize=2, lw=1, c=lcolor if LR[i] else rcolor) 35 | 36 | RADIUS = 750 # space around the subject 37 | xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2] 38 | ax.set_xlim3d([-RADIUS+xroot, RADIUS+xroot]) 39 | ax.set_zlim3d([-RADIUS+zroot, RADIUS+zroot]) 40 | ax.set_ylim3d([-RADIUS+yroot, RADIUS+yroot]) 41 | 42 | if add_labels: 43 | ax.set_xlabel("x") 44 | ax.set_ylabel("y") 45 | ax.set_zlabel("z") 46 | 47 | # Get rid of the ticks and tick labels 48 | ax.set_xticks([]) 49 | ax.set_yticks([]) 50 | ax.set_zticks([]) 51 | 52 | ax.get_xaxis().set_ticklabels([]) 53 | ax.get_yaxis().set_ticklabels([]) 54 | ax.set_zticklabels([]) 55 | ax.set_aspect('equal') 56 | 57 | # Get rid of the panes (actually, make them white) 58 | white = (1.0, 1.0, 0.1, 0.0) 59 | ax.w_xaxis.set_pane_color(white) 60 | ax.w_yaxis.set_pane_color(white) 61 | # Keep z pane 62 | 63 | # Get rid of the lines in 3d 64 | ax.w_xaxis.line.set_color(white) 65 | ax.w_yaxis.line.set_color(white) 66 | ax.w_zaxis.line.set_color(white) 67 | 68 | def show2Dpose(channels, ax, lcolor="#3498db", rcolor="#e74c3c", add_labels=False): 69 | """ 70 | Visualize a 2d skeleton 71 | 72 | Args 73 | channels: 64x1 vector. The pose to plot. 74 | ax: matplotlib axis to draw on 75 | lcolor: color for left part of the body 76 | rcolor: color for right part of the body 77 | add_labels: whether to add coordinate labels 78 | Returns 79 | Nothing. Draws on ax. 80 | """ 81 | 82 | assert channels.size == len(data_utils.H36M_NAMES)*2, "channels should have 64 entries, it has %d instead" % channels.size 83 | vals = np.reshape( channels, (len(data_utils.H36M_NAMES), -1) ) 84 | 85 | I = np.array([1,2,3,1,7,8,1, 13,14,14,18,19,14,26,27])-1 # start points 86 | J = np.array([2,3,4,7,8,9,13,14,16,18,19,20,26,27,28])-1 # end points 87 | LR = np.array([1,1,1,0,0,0,0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=bool) 88 | 89 | # Make connection matrix 90 | for i in np.arange( len(I) ): 91 | x, y = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(2)] 92 | ax.plot(x, y, lw=2, c=lcolor if LR[i] else rcolor) 93 | 94 | # Get rid of the ticks 95 | ax.set_xticks([]) 96 | ax.set_yticks([]) 97 | 98 | # Get rid of tick labels 99 | ax.get_xaxis().set_ticklabels([]) 100 | ax.get_yaxis().set_ticklabels([]) 101 | 102 | RADIUS = 350 # space around the subject 103 | xroot, yroot = vals[0,0], vals[0,1] 104 | ax.set_xlim([-RADIUS+xroot, RADIUS+xroot]) 105 | ax.set_ylim([-RADIUS+yroot, RADIUS+yroot]) 106 | if add_labels: 107 | ax.set_xlabel("x") 108 | ax.set_ylabel("z") 109 | 110 | ax.set_aspect('equal') 111 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/applications/vmdlifting_multi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # vmdlifting.py - estimate 3D pose by "Lifting-from-the-Deep", and convert the pose data to VMD 5 | # 6 | # This program is derived from demo.py in Lifting-from-the-Deep which is created by Denis Tome' 7 | 8 | from __future__ import print_function 9 | 10 | def usage(prog): 11 | print('usage: ' + prog + ' IMAGE_FILE VMD_FILE [POSITION_FILE]') 12 | sys.exit() 13 | 14 | import __init__ 15 | 16 | from lifting import PoseEstimator 17 | from lifting.utils import draw_limbs 18 | from lifting.utils import plot_pose 19 | 20 | import cv2 21 | import matplotlib.pyplot as plt 22 | from os.path import dirname, realpath 23 | from pos2vmd_multi import pos2vmd_multi 24 | from head_face import head_face_estimation 25 | 26 | DIR_PATH = dirname(realpath(__file__)) 27 | PROJECT_PATH = realpath(DIR_PATH + '/..') 28 | SAVED_SESSIONS_DIR = PROJECT_PATH + '/data/saved_sessions' 29 | SESSION_PATH = SAVED_SESSIONS_DIR + '/init_session/init' 30 | PROB_MODEL_PATH = SAVED_SESSIONS_DIR + '/prob_model/prob_model_params.mat' 31 | 32 | def vmdlifting_multi(video_file, vmd_file, position_file): 33 | video_file_path = realpath(video_file) 34 | 35 | cap = cv2.VideoCapture(video_file_path) 36 | 37 | pose_3d_list = [] 38 | head_rotation_list = [] 39 | expression_frames_list = [] 40 | idx = 0 41 | while(cap.isOpened()): 42 | # Capture frame-by-frame 43 | ret, frame = cap.read() 44 | 45 | # 読み込みがなければ終了 46 | if ret == False: 47 | break 48 | 49 | print("frame load idx={0}".format(idx)) 50 | 51 | image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # conversion to rgb 52 | 53 | # 念のため、フレーム画像出力 54 | image_file_path = "{0}/frame_{1:012d}.png".format(dirname(video_file_path), idx) 55 | cv2.imwrite(image_file_path,image) 56 | 57 | # create pose estimator 58 | image_size = image.shape 59 | 60 | pose_estimator = PoseEstimator(image_size, SESSION_PATH, PROB_MODEL_PATH) 61 | 62 | # load model 63 | pose_estimator.initialise() 64 | 65 | # estimation 66 | pose_2d, visibility, pose_3d = pose_estimator.estimate(image) 67 | 68 | # close model 69 | pose_estimator.close() 70 | 71 | if (position_file is not None): 72 | # dump 3d joint position data to position_file 73 | fout = open(position_file, "w") 74 | for pose in pose_3d: 75 | for j in range(pose.shape[1]): 76 | print(j, pose[0, j], pose[1, j], pose[2, j], file=fout) 77 | fout.close() 78 | 79 | # head position & face expression 80 | head_rotation, expression_frames = head_face_estimation(image_file_path) 81 | head_rotation_list.append(head_rotation) 82 | expression_frames_list.append(expression_frames) 83 | 84 | pose_3d_list.append(pose_3d) 85 | 86 | idx += 1 87 | 88 | # When everything done, release the capture 89 | cap.release() 90 | 91 | pos2vmd_multi(pose_3d_list, vmd_file, head_rotation_list, expression_frames_list) 92 | 93 | # Show 2D and 3D poses 94 | # display_results(image, pose_2d, visibility, pose_3d) 95 | 96 | 97 | def display_results(in_image, data_2d, joint_visibility, data_3d): 98 | """Plot 2D and 3D poses for each of the people in the image.""" 99 | plt.figure() 100 | draw_limbs(in_image, data_2d, joint_visibility) 101 | plt.imshow(in_image) 102 | plt.axis('off') 103 | 104 | # Show 3D poses 105 | for single_3D in data_3d: 106 | # or plot_pose(Prob3dPose.centre_all(single_3D)) 107 | plot_pose(single_3D) 108 | 109 | plt.show() 110 | 111 | if __name__ == '__main__': 112 | import sys 113 | if (len(sys.argv) < 3): 114 | usage(sys.argv[0]) 115 | 116 | video_file = sys.argv[1] 117 | vmd_file = sys.argv[2] 118 | dump_file = None 119 | if (len(sys.argv) >3 ): 120 | dump_file = sys.argv[3] 121 | 122 | vmdlifting_multi(video_file, vmd_file, dump_file) 123 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/cameras.py: -------------------------------------------------------------------------------- 1 | 2 | """Utilities to deal with the cameras of human3.6m""" 3 | 4 | from __future__ import division 5 | 6 | import os 7 | import h5py 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | import matplotlib.image as mpimg 11 | from . import data_utils 12 | from . import viz 13 | 14 | def project_point_radial( P, R, T, f, c, k, p ): 15 | """ 16 | Project points from 3d to 2d using camera parameters 17 | including radial and tangential distortion 18 | 19 | Args 20 | P: Nx3 points in world coordinates 21 | R: 3x3 Camera rotation matrix 22 | T: 3x1 Camera translation parameters 23 | f: (scalar) Camera focal length 24 | c: 2x1 Camera center 25 | k: 3x1 Camera radial distortion coefficients 26 | p: 2x1 Camera tangential distortion coefficients 27 | Returns 28 | Proj: Nx2 points in pixel space 29 | D: 1xN depth of each point in camera space 30 | radial: 1xN radial distortion per point 31 | tan: 1xN tangential distortion per point 32 | r2: 1xN squared radius of the projected points before distortion 33 | """ 34 | 35 | # P is a matrix of 3-dimensional points 36 | assert len(P.shape) == 2 37 | assert P.shape[1] == 3 38 | 39 | N = P.shape[0] 40 | X = R.dot( P.T - T ) # rotate and translate 41 | XX = X[:2,:] / X[2,:] 42 | r2 = XX[0,:]**2 + XX[1,:]**2 43 | 44 | radial = 1 + np.einsum( 'ij,ij->j', np.tile(k,(1, N)), np.array([r2, r2**2, r2**3]) ); 45 | tan = p[0]*XX[1,:] + p[1]*XX[0,:] 46 | 47 | XXX = XX * np.tile(radial+tan,(2,1)) + np.outer(np.array([p[1], p[0]]).reshape(-1), r2 ) 48 | 49 | Proj = (f * XXX) + c 50 | Proj = Proj.T 51 | 52 | D = X[2,] 53 | 54 | return Proj, D, radial, tan, r2 55 | 56 | def world_to_camera_frame(P, R, T): 57 | """ 58 | Convert points from world to camera coordinates 59 | 60 | Args 61 | P: Nx3 3d points in world coordinates 62 | R: 3x3 Camera rotation matrix 63 | T: 3x1 Camera translation parameters 64 | Returns 65 | X_cam: Nx3 3d points in camera coordinates 66 | """ 67 | 68 | assert len(P.shape) == 2 69 | assert P.shape[1] == 3 70 | 71 | X_cam = R.dot( P.T - T ) # rotate and translate 72 | 73 | return X_cam.T 74 | 75 | def camera_to_world_frame(P, R, T): 76 | """Inverse of world_to_camera_frame 77 | 78 | Args 79 | P: Nx3 points in camera coordinates 80 | R: 3x3 Camera rotation matrix 81 | T: 3x1 Camera translation parameters 82 | Returns 83 | X_cam: Nx3 points in world coordinates 84 | """ 85 | 86 | assert len(P.shape) == 2 87 | assert P.shape[1] == 3 88 | 89 | X_cam = R.T.dot( P.T ) + T # rotate and translate 90 | 91 | return X_cam.T 92 | 93 | def load_camera_params( hf, path ): 94 | """Load h36m camera parameters 95 | 96 | Args 97 | hf: hdf5 open file with h36m cameras data 98 | path: path or key inside hf to the camera we are interested in 99 | Returns 100 | R: 3x3 Camera rotation matrix 101 | T: 3x1 Camera translation parameters 102 | f: (scalar) Camera focal length 103 | c: 2x1 Camera center 104 | k: 3x1 Camera radial distortion coefficients 105 | p: 2x1 Camera tangential distortion coefficients 106 | name: String with camera id 107 | """ 108 | 109 | R = hf[ path.format('R') ][:] 110 | R = R.T 111 | 112 | T = hf[ path.format('T') ][:] 113 | f = hf[ path.format('f') ][:] 114 | c = hf[ path.format('c') ][:] 115 | k = hf[ path.format('k') ][:] 116 | p = hf[ path.format('p') ][:] 117 | 118 | name = hf[ path.format('Name') ][:] 119 | name = "".join( [chr(item) for item in name] ) 120 | 121 | return R, T, f, c, k, p, name 122 | 123 | def load_cameras( bpath='cameras.h5', subjects=[1,5,6,7,8,9,11] ): 124 | """Loads the cameras of h36m 125 | 126 | Args 127 | bpath: path to hdf5 file with h36m camera data 128 | subjects: List of ints representing the subject IDs for which cameras are requested 129 | Returns 130 | rcams: dictionary of 4 tuples per subject ID containing its camera parameters for the 4 h36m cams 131 | """ 132 | rcams = {} 133 | 134 | with h5py.File(bpath,'r') as hf: 135 | for s in subjects: 136 | for c in range(4): # There are 4 cameras in human3.6m 137 | rcams[(s, c+1)] = load_camera_params(hf, 'subject%d/camera%d/{0}' % (s,c+1) ) 138 | 139 | return rcams 140 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/matlab/evaluateNYU.m: -------------------------------------------------------------------------------- 1 | function evaluateNYU 2 | 3 | % Evaluation of depth prediction on NYU Depth v2 dataset. 4 | 5 | % ------------------------------------------------------------------------- 6 | % Setup MatConvNet 7 | % ------------------------------------------------------------------------- 8 | 9 | % Set your matconvnet path here: 10 | matconvnet_path = '../../matconvnet-1.0-beta20'; 11 | setupMatConvNet(matconvnet_path); 12 | 13 | % ------------------------------------------------------------------------- 14 | % Options 15 | % ------------------------------------------------------------------------- 16 | 17 | opts.dataDir = fullfile(pwd, 'NYU'); % working directory 18 | opts.interp = 'nearest'; % interpolation method applied during resizing 19 | 20 | netOpts.gpu = true; % set to true to enable GPU support 21 | netOpts.plot = true; % set to true to visualize the predictions during inference 22 | 23 | % ------------------------------------------------------------------------- 24 | % Prepate data 25 | % ------------------------------------------------------------------------- 26 | 27 | imdb = get_NYUDepth_v2(opts); 28 | net = get_model(opts); 29 | 30 | % Test set 31 | testSet.images = imdb.images(:,:,:, imdb.set == 2); 32 | testSet.depths = imdb.depths(:,:, imdb.set == 2); 33 | 34 | % Prepare input for evaluation through the network, in accordance to the 35 | % way the model was trained for the NYU dataset. No processing is applied 36 | % to the ground truth. 37 | meta = net.meta.normalization; % information about input 38 | res = meta.imageSize(1:2) + 2*meta.border; 39 | testSet.images = imresize(testSet.images, res, opts.interp); % resize 40 | testSet.images = testSet.images(1+meta.border(1):end-meta.border(1), 1+meta.border(2):end-meta.border(2), :, :); % center crop 41 | 42 | % ------------------------------------------------------------------------- 43 | % Evaluate network 44 | % ------------------------------------------------------------------------- 45 | 46 | % Get predictions 47 | predictions = DepthMapPrediction(testSet, net, netOpts); 48 | predictions = squeeze(predictions); % remove singleton dimensions 49 | predictions = imresize(predictions, [size(testSet.depths,1), size(testSet.depths,2)], 'bilinear'); %rescale 50 | 51 | % Error calculation 52 | errors = error_metrics(predictions, testSet.depths, []); 53 | 54 | % Save results 55 | fprintf('\nsaving predictions...'); 56 | save(fullfile(opts.dataDir, 'results.mat'), 'predictions', 'errors', '-v7.3'); 57 | fprintf('done!\n'); 58 | 59 | 60 | 61 | function imdb = get_NYUDepth_v2(opts) 62 | % ------------------------------------------------------------------------- 63 | % Download required data 64 | % ------------------------------------------------------------------------- 65 | 66 | opts.dataDir = fullfile(opts.dataDir, 'data'); 67 | if ~exist(opts.dataDir, 'dir'), mkdir(opts.dataDir); end 68 | 69 | % Download dataset 70 | filename = fullfile(opts.dataDir, 'nyu_depth_v2_labeled.mat'); 71 | if ~exist(filename, 'file') 72 | url = 'http://horatio.cs.nyu.edu/mit/silberman/nyu_depth_v2/nyu_depth_v2_labeled.mat'; 73 | fprintf('downloading dataset (~2.8 GB): %s\n', url); 74 | websave(filename, url); 75 | end 76 | 77 | % Download official train/test split 78 | filename_splits = fullfile(opts.dataDir, 'splits.mat'); 79 | if ~exist(filename_splits, 'file') 80 | url_split = 'http://horatio.cs.nyu.edu/mit/silberman/indoor_seg_sup/splits.mat'; 81 | fprintf('downloading train/test split: %s\n', url_split); 82 | websave(filename_splits, url_split); 83 | end 84 | 85 | % Load dataset and splits 86 | fprintf('loading data to workspace...'); 87 | data = load(filename); 88 | splits = load(filename_splits); 89 | 90 | % Store necessary information to imdb structure 91 | imdb.images = single(data.images); %(no mean subtraction has been performed) 92 | imdb.depths = single(data.depths); %depth filled-in values 93 | imdb.set(splits.trainNdxs) = 1; %training indices (ignored for inference) 94 | imdb.set(splits.testNdxs) = 2; %testing indices (on which evaluation is performed) 95 | fprintf(' done!\n'); 96 | 97 | 98 | 99 | function net = get_model(opts) 100 | % ------------------------------------------------------------------------- 101 | % Download trained models 102 | % ------------------------------------------------------------------------- 103 | 104 | opts.dataDir = fullfile(opts.dataDir, 'models'); 105 | if ~exist(opts.dataDir, 'dir'), mkdir(opts.dataDir); end 106 | 107 | filename = fullfile(opts.dataDir, 'NYU_ResNet-UpProj.mat'); 108 | if ~exist(filename, 'file') 109 | url = 'http://campar.in.tum.de/files/rupprecht/depthpred/NYU_ResNet-UpProj.zip'; 110 | fprintf('downloading trained model: %s\n', url); 111 | unzip(url, opts.dataDir); 112 | end 113 | 114 | net = load(filename); 115 | 116 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/applications/head_face.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # head_face.py - estimate head pose & facial expression 5 | 6 | from __future__ import print_function 7 | 8 | def usage(prog): 9 | print('usage: ' + prog + ' IMAGE_FILE') 10 | sys.exit() 11 | 12 | # You can get the trained model file from: 13 | # http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2. 14 | # Note that the license for the iBUG 300-W dataset excludes commercial use. 15 | # So you should contact Imperial College London to find out if it's OK for 16 | # you to use this model file in a commercial product. 17 | DEFAULT_PREDICTOR_PATH = 'predictor/shape_predictor_68_face_landmarks.dat' 18 | 19 | import cv2 20 | import dlib 21 | import numpy as np 22 | import os 23 | from skimage import io 24 | from PyQt5.QtGui import QQuaternion, QVector3D, QMatrix3x3 25 | 26 | # dlib の python_examples/face_landmark_detection.py を改造 27 | def face_landmark_detection(image_path, predictor_path): 28 | shape_list = [] 29 | image = io.imread(image_path) 30 | if not os.path.exists(predictor_path): 31 | print("A trained model for face landmark detection is not found.") 32 | print("You can get the trained model from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2") 33 | return shape_list 34 | predictor = dlib.shape_predictor(predictor_path) 35 | detector = dlib.get_frontal_face_detector() 36 | dets = detector(image, 1) 37 | print("Number of faces detected: {}".format(len(dets))) 38 | for k, d in enumerate(dets): 39 | print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( 40 | k, d.left(), d.top(), d.right(), d.bottom())) 41 | # Get the landmarks/parts for the face in box d. 42 | shape = predictor(image, d) 43 | print("Nose tip: {}, Chin: {}".format(shape.part(30), shape.part(8))) 44 | print("Left eye: {}, Right eye: {}".format(shape.part(45), shape.part(36))) 45 | print("Mouth-left: {}, Mouth-right: {} ...".format(shape.part(54), shape.part(48))) 46 | shape_list.append(shape) 47 | # win = dlib.image_window() 48 | # win.clear_overlay() 49 | # win.set_image(image) 50 | # for shape in shape_list: 51 | # win.add_overlay(shape) 52 | # dlib.hit_enter_to_continue() 53 | return shape_list 54 | 55 | def head_pose_estimation(image_path, shape): 56 | image = cv2.imread(image_path) 57 | pos2d_array = [] 58 | for k in [30, 8, 45, 36, 54, 48]: 59 | pos2d_array.append((shape.part(k).x, shape.part(k).y)) 60 | pos2d = np.array(pos2d_array, dtype = "double") 61 | pos3d_ini = np.array([(0.0, 0.0, 0.0), (0.0, -6.6, -1.3), (-4.5, 3.4, -2.7), 62 | (4.5, 3.4, -2.7), (-3.0, -3.0, -2.5), (3.0, -3.0, -2.5)]) 63 | focal_length = max([image.shape[0], image.shape[1]]) 64 | print("focal_length: ", focal_length) 65 | camera = np.array([[focal_length, 0, image.shape[1] / 2], 66 | [0, focal_length, image.shape[0] / 2], 67 | [0, 0, 1]], dtype = "double") 68 | distortion = np.zeros((4, 1)) 69 | retval, rot_vec, trans_vec = cv2.solvePnP(pos3d_ini, pos2d, camera, distortion, 70 | flags = cv2.SOLVEPNP_ITERATIVE) 71 | # for debug 72 | #print("Rot_Vec: \n ", rot_vec) 73 | #print(type(rot_vec)) 74 | #print(rot_vec.shape) 75 | #print("Trans_Vec:\n ", trans_vec) 76 | # 顔の回転を求める 77 | rot_mat = cv2.Rodrigues(rot_vec)[0] 78 | proj_mat = np.array([[rot_mat[0][0], rot_mat[0][1], rot_mat[0][2], 0], 79 | [rot_mat[1][0], rot_mat[1][1], rot_mat[1][2], 0], 80 | [rot_mat[2][0], rot_mat[2][1], rot_mat[2][2], 0]], dtype="double") 81 | eulerAngles = cv2.decomposeProjectionMatrix(proj_mat)[6] 82 | print("eulerAngles: \n", eulerAngles) 83 | 84 | #head_rotation = QQuaternion.fromEulerAngles(eulerAngles[0], eulerAngles[1], eulerAngles[2]) 85 | head_rotation = QQuaternion.fromEulerAngles(-eulerAngles[0], eulerAngles[1], -eulerAngles[2]) 86 | print("head_rotation: ", head_rotation) 87 | return head_rotation 88 | 89 | 90 | def make_expression_frames(shape): 91 | return None 92 | 93 | 94 | def head_face_estimation(image_path, predictor_path=None): 95 | if predictor_path is None: 96 | predictor_path = DEFAULT_PREDICTOR_PATH 97 | shape_list = face_landmark_detection(image_path, predictor_path) 98 | if len(shape_list) == 0: 99 | return None, None 100 | head_rotation = head_pose_estimation(image_path, shape_list[0]) 101 | expression_frames = make_expression_frames(shape_list[0]) 102 | return head_rotation, expression_frames 103 | 104 | 105 | if __name__ == '__main__': 106 | import sys 107 | if (len(sys.argv) < 2): 108 | usage(sys.argv[0]) 109 | 110 | head_rotation, expression_frames = head_face_estimation(sys.argv[1]) 111 | 112 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/matlab/evaluateMake3D.m: -------------------------------------------------------------------------------- 1 | function evaluateMake3D 2 | 3 | % Evaluation of depth prediction on Make3D dataset. 4 | 5 | % ------------------------------------------------------------------------- 6 | % Setup MatConvNet 7 | % ------------------------------------------------------------------------- 8 | 9 | % Set your matconvnet path here: 10 | matconvnet_path = '../../matconvnet-1.0-beta20'; 11 | setupMatConvNet(matconvnet_path); 12 | 13 | % ------------------------------------------------------------------------- 14 | % Options 15 | % ------------------------------------------------------------------------- 16 | 17 | opts.dataDir = fullfile(pwd, 'Make3D'); % working directory 18 | opts.interp = 'nearest'; % interpolation method applied during resizing 19 | opts.imageSize = [460,345]; % desired image size for evaluation 20 | 21 | netOpts.gpu = true; % set to true to enable GPU support 22 | netOpts.plot = true; % set to true to visualize the predictions during inference 23 | 24 | % ------------------------------------------------------------------------- 25 | % Prepate data 26 | % ------------------------------------------------------------------------- 27 | 28 | imdb = get_Make3D(opts); 29 | net = get_model(opts); 30 | 31 | % Test set 32 | testSet.images = imdb.images(:,:,:, imdb.set == 2); 33 | testSet.depths = imdb.depths(:,:, imdb.set == 2); 34 | 35 | % resize images to input resolution (equal to round(opts.imageSize/2)) 36 | testSet.images = imresize(testSet.images, net.meta.normalization.imageSize(1:2), opts.interp); 37 | % resize depth to opts.imageSize resolution 38 | testSet.depths = imresize(testSet.depths, opts.imageSize, opts.interp); 39 | 40 | % ------------------------------------------------------------------------- 41 | % Evaluate network 42 | % ------------------------------------------------------------------------- 43 | 44 | % Get predictions 45 | predictions = DepthMapPrediction(testSet, net, netOpts); 46 | predictions = squeeze(predictions); % remove singleton dimensions 47 | predictions = imresize(predictions, [size(testSet.depths,1), size(testSet.depths,2)], 'bilinear'); %rescale 48 | 49 | % Error calculation 50 | c1_mask = testSet.depths > 0 & testSet.depths < 70; 51 | errors = error_metrics(predictions, testSet.depths, c1_mask); 52 | 53 | % Save results 54 | fprintf('\nsaving predictions...'); 55 | save(fullfile(opts.dataDir, 'results.mat'), 'predictions', 'errors', '-v7.3'); 56 | fprintf('done!\n'); 57 | 58 | 59 | function imdb = get_Make3D(opts) 60 | % ------------------------------------------------------------------------- 61 | % Download required data (test only) 62 | % ------------------------------------------------------------------------- 63 | 64 | opts.dataDirImages = fullfile(opts.dataDir, 'data', 'Test134'); 65 | opts.dataDirDepths = fullfile(opts.dataDir, 'data', 'Gridlaserdata'); 66 | 67 | % Download test set 68 | if ~exist(opts.dataDirImages, 'dir') 69 | fprintf('downloading Make3D testing images (~190 MB)...'); 70 | mkdir(opts.dataDirImages); 71 | untar('http://www.cs.cornell.edu/~asaxena/learningdepth/Test134.tar.gz', fileparts(opts.dataDirImages)); 72 | fprintf('done.\n'); 73 | end 74 | 75 | if ~exist(opts.dataDirDepths, 'dir') 76 | fprintf('downloading Make3D testing depth maps (~22 MB)...'); 77 | mkdir(opts.dataDirDepths); 78 | untar('http://www.cs.cornell.edu/~asaxena/learningdepth/Test134Depth.tar.gz', fileparts(opts.dataDirDepths)); 79 | fprintf('done.\n'); 80 | end 81 | 82 | fprintf('preparing testing data...'); 83 | img_files = dir(fullfile(opts.dataDirImages, 'img-*.jpg')); 84 | depth_files = dir(fullfile(opts.dataDirDepths, 'depth_sph_corr-*.mat')); 85 | 86 | % Verify that the correct number of files has been found 87 | assert(numel(img_files)==134, 'Incorrect number of Make3D test images. \n'); 88 | assert(numel(depth_files)==134, 'Incorrect number of Make3D test depths. \n'); 89 | 90 | % Read dataset files and store necessary information to imdb structure 91 | for i = 1:numel(img_files) 92 | imdb.images(:,:,:,i) = single(imread(fullfile(opts.dataDirImages, img_files(i).name))); % get RGB image 93 | gt = load(fullfile(opts.dataDirDepths, depth_files(i).name)); 94 | imdb.depths(:,:,i) = single(gt.Position3DGrid(:,:,4)); % get depth channel 95 | imdb.set(i) = 2; 96 | end 97 | fprintf(' done!\n'); 98 | 99 | 100 | 101 | function net = get_model(opts) 102 | % ------------------------------------------------------------------------- 103 | % Download trained models 104 | % ------------------------------------------------------------------------- 105 | 106 | opts.dataDir = fullfile(opts.dataDir, 'models'); 107 | if ~exist(opts.dataDir, 'dir'), mkdir(opts.dataDir); end 108 | 109 | filename = fullfile(opts.dataDir, 'Make3D_ResNet-UpProj.mat'); 110 | if ~exist(filename, 'file') 111 | url = 'http://campar.in.tum.de/files/rupprecht/depthpred/Make3D_ResNet-UpProj.zip'; 112 | fprintf('downloading trained model: %s\n', url); 113 | unzip(url, opts.dataDir); 114 | end 115 | 116 | net = load(filename); 117 | 118 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/packages/lifting/_pose_estimator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Jul 13 16:20 2017 4 | 5 | @author: Denis Tome' 6 | """ 7 | from . import utils 8 | import cv2 9 | import numpy as np 10 | import tensorflow as tf 11 | 12 | import abc 13 | ABC = abc.ABCMeta('ABC', (object,), {}) 14 | 15 | __all__ = [ 16 | 'PoseEstimatorInterface', 17 | 'PoseEstimator' 18 | ] 19 | 20 | 21 | class PoseEstimatorInterface(ABC): 22 | 23 | @abc.abstractmethod 24 | def initialise(self): 25 | pass 26 | 27 | @abc.abstractmethod 28 | def estimate(self, image): 29 | return 30 | 31 | @abc.abstractmethod 32 | def close(self): 33 | pass 34 | 35 | 36 | class PoseEstimator(PoseEstimatorInterface): 37 | 38 | def __init__(self, image_size, session_path, prob_model_path): 39 | """Initialising the graph in tensorflow. 40 | INPUT: 41 | image_size: Size of the image in the format (w x h x 3)""" 42 | 43 | self.session = None 44 | self.poseLifting = utils.Prob3dPose(prob_model_path) 45 | self.sess = -1 46 | self.orig_img_size = np.array(image_size) 47 | self.scale = utils.config.INPUT_SIZE / (self.orig_img_size[0] * 1.0) 48 | self.img_size = np.round( 49 | self.orig_img_size * self.scale).astype(np.int32) 50 | self.image_in = None 51 | self.heatmap_person_large = None 52 | self.pose_image_in = None 53 | self.pose_centermap_in = None 54 | self.heatmap_pose = None 55 | self.session_path = session_path 56 | 57 | def initialise(self): 58 | """Load saved model in the graph 59 | INPUT: 60 | sess_path: path to the dir containing the tensorflow saved session 61 | OUTPUT: 62 | sess: tensorflow session""" 63 | 64 | ''' 65 | TODO: _N shadows built-in name '_N' 66 | ''' 67 | _N = 16 68 | 69 | tf.reset_default_graph() 70 | with tf.variable_scope('CPM'): 71 | # placeholders for person network 72 | self.image_in = tf.placeholder( 73 | tf.float32, [1, utils.config.INPUT_SIZE, self.img_size[1], 3]) 74 | 75 | heatmap_person = utils.inference_person(self.image_in) 76 | 77 | self.heatmap_person_large = tf.image.resize_images( 78 | heatmap_person, [utils.config.INPUT_SIZE, self.img_size[1]]) 79 | 80 | # placeholders for pose network 81 | self.pose_image_in = tf.placeholder( 82 | tf.float32, 83 | [_N, utils.config.INPUT_SIZE, utils.config.INPUT_SIZE, 3]) 84 | 85 | self.pose_centermap_in = tf.placeholder( 86 | tf.float32, 87 | [_N, utils.config.INPUT_SIZE, utils.config.INPUT_SIZE, 1]) 88 | 89 | self.heatmap_pose = utils.inference_pose( 90 | self.pose_image_in, self.pose_centermap_in) 91 | 92 | sess = tf.Session() 93 | sess.run(tf.global_variables_initializer()) 94 | saver = tf.train.Saver() 95 | saver.restore(sess, self.session_path) 96 | 97 | self.session = sess 98 | 99 | def estimate(self, image): 100 | """ 101 | Estimate 2d and 3d poses on the image. 102 | INPUT: 103 | image: RGB image in the format (w x h x 3) 104 | sess: tensorflow session 105 | OUTPUT: 106 | pose_2d: 2D pose for each of the people in the image in the format 107 | (num_ppl x num_joints x 2) visibility: vector containing a bool 108 | value for each joint representing the visibility of the joint in 109 | the image (could be due to occlusions or the joint is not in the 110 | image) pose_3d: 3D pose for each of the people in the image in the 111 | format (num_ppl x 3 x num_joints) 112 | """ 113 | 114 | sess = self.session 115 | 116 | image = cv2.resize(image, (0, 0), fx=self.scale, 117 | fy=self.scale, interpolation=cv2.INTER_CUBIC) 118 | b_image = np.array(image[np.newaxis] / 255.0 - 0.5, dtype=np.float32) 119 | 120 | hmap_person = sess.run(self.heatmap_person_large, { 121 | self.image_in: b_image}) 122 | 123 | hmap_person = np.squeeze(hmap_person) 124 | centers = utils.detect_objects_heatmap(hmap_person) 125 | b_pose_image, b_pose_cmap = utils.prepare_input_posenet( 126 | b_image[0], centers, 127 | [utils.config.INPUT_SIZE, image.shape[1]], 128 | [utils.config.INPUT_SIZE, utils.config.INPUT_SIZE]) 129 | 130 | feed_dict = { 131 | self.pose_image_in: b_pose_image, 132 | self.pose_centermap_in: b_pose_cmap 133 | } 134 | _hmap_pose = sess.run(self.heatmap_pose, feed_dict) 135 | 136 | # Estimate 2D poses 137 | estimated_2d_pose, visibility = utils.detect_parts_heatmaps( 138 | _hmap_pose, centers, 139 | [utils.config.INPUT_SIZE, utils.config.INPUT_SIZE]) 140 | 141 | # Estimate 3D poses 142 | transformed_pose2d, weights = self.poseLifting.transform_joints( 143 | estimated_2d_pose.copy(), visibility) 144 | pose_3d = self.poseLifting.compute_3d(transformed_pose2d, weights) 145 | pose_2d = np.round(estimated_2d_pose / self.scale).astype(np.int32) 146 | 147 | return pose_2d, visibility, pose_3d 148 | 149 | def close(self): 150 | self.session.close() 151 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/openpose_3dpose_sandbox_realtime.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import matplotlib.gridspec as gridspec 5 | import tensorflow as tf 6 | import data_utils 7 | import viz 8 | import re 9 | import cameras 10 | import json 11 | import os 12 | from predict_3dpose import create_model 13 | import cv2 14 | import imageio 15 | import time 16 | import logging 17 | import glob 18 | FLAGS = tf.app.flags.FLAGS 19 | 20 | order = [15, 12, 25, 26, 27, 17, 18, 19, 1, 2, 3, 6, 7, 8] 21 | 22 | logging.basicConfig(level=logging.INFO) 23 | logger = logging.getLogger(__name__) 24 | 25 | 26 | 27 | def main(_): 28 | done = [] 29 | 30 | enc_in = np.zeros((1, 64)) 31 | enc_in[0] = [0 for i in range(64)] 32 | 33 | actions = data_utils.define_actions(FLAGS.action) 34 | 35 | SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] 36 | rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) 37 | train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( 38 | actions, FLAGS.data_dir) 39 | train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( 40 | actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) 41 | 42 | device_count = {"GPU": 0} 43 | png_lib = [] 44 | with tf.Session(config=tf.ConfigProto( 45 | device_count=device_count, 46 | allow_soft_placement=True)) as sess: 47 | #plt.figure(3) 48 | batch_size = 128 49 | model = create_model(sess, actions, batch_size) 50 | while True: 51 | key = cv2.waitKey(1) & 0xFF 52 | #logger.info("start reading data") 53 | # check for other file types 54 | list_of_files = glob.iglob("{0}/*".format(openpose_output_dir)) # You may use iglob in Python3 55 | latest_file = "" 56 | try: 57 | latest_file = max(list_of_files, key=os.path.getctime) 58 | except ValueError: 59 | #empthy dir 60 | pass 61 | if not latest_file: 62 | continue 63 | try: 64 | _file = file_name = latest_file 65 | print (latest_file) 66 | if not os.path.isfile(_file): raise Exception("No file found!!, {0}".format(_file)) 67 | data = json.load(open(_file)) 68 | #take first person 69 | _data = data["people"][0]["pose_keypoints"] 70 | xy = [] 71 | #ignore confidence score 72 | for o in range(0,len(_data),3): 73 | xy.append(_data[o]) 74 | xy.append(_data[o+1]) 75 | 76 | frame_indx = re.findall("(\d+)", file_name) 77 | frame = int(frame_indx[0]) 78 | 79 | joints_array = np.zeros((1, 36)) 80 | joints_array[0] = [0 for i in range(36)] 81 | for o in range(len(joints_array[0])): 82 | #feed array with xy array 83 | joints_array[0][o] = xy[o] 84 | _data = joints_array[0] 85 | # mapping all body parts or 3d-pose-baseline format 86 | for i in range(len(order)): 87 | for j in range(2): 88 | # create encoder input 89 | enc_in[0][order[i] * 2 + j] = _data[i * 2 + j] 90 | for j in range(2): 91 | # Hip 92 | enc_in[0][0 * 2 + j] = (enc_in[0][1 * 2 + j] + enc_in[0][6 * 2 + j]) / 2 93 | # Neck/Nose 94 | enc_in[0][14 * 2 + j] = (enc_in[0][15 * 2 + j] + enc_in[0][12 * 2 + j]) / 2 95 | # Thorax 96 | enc_in[0][13 * 2 + j] = 2 * enc_in[0][12 * 2 + j] - enc_in[0][14 * 2 + j] 97 | 98 | # set spine 99 | spine_x = enc_in[0][24] 100 | spine_y = enc_in[0][25] 101 | 102 | enc_in = enc_in[:, dim_to_use_2d] 103 | mu = data_mean_2d[dim_to_use_2d] 104 | stddev = data_std_2d[dim_to_use_2d] 105 | enc_in = np.divide((enc_in - mu), stddev) 106 | 107 | dp = 1.0 108 | dec_out = np.zeros((1, 48)) 109 | dec_out[0] = [0 for i in range(48)] 110 | _, _, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) 111 | all_poses_3d = [] 112 | enc_in = data_utils.unNormalizeData(enc_in, data_mean_2d, data_std_2d, dim_to_ignore_2d) 113 | poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) 114 | gs1 = gridspec.GridSpec(1, 1) 115 | gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. 116 | plt.axis('off') 117 | all_poses_3d.append( poses3d ) 118 | enc_in, poses3d = map( np.vstack, [enc_in, all_poses_3d] ) 119 | subplot_idx, exidx = 1, 1 120 | _max = 0 121 | _min = 10000 122 | 123 | for i in range(poses3d.shape[0]): 124 | for j in range(32): 125 | tmp = poses3d[i][j * 3 + 2] 126 | poses3d[i][j * 3 + 2] = poses3d[i][j * 3 + 1] 127 | poses3d[i][j * 3 + 1] = tmp 128 | if poses3d[i][j * 3 + 2] > _max: 129 | _max = poses3d[i][j * 3 + 2] 130 | if poses3d[i][j * 3 + 2] < _min: 131 | _min = poses3d[i][j * 3 + 2] 132 | 133 | for i in range(poses3d.shape[0]): 134 | for j in range(32): 135 | poses3d[i][j * 3 + 2] = _max - poses3d[i][j * 3 + 2] + _min 136 | poses3d[i][j * 3] += (spine_x - 630) 137 | poses3d[i][j * 3 + 2] += (500 - spine_y) 138 | 139 | # Plot 3d predictions 140 | ax = plt.subplot(gs1[subplot_idx - 1], projection='3d') 141 | ax.view_init(18, -70) 142 | logger.debug(np.min(poses3d)) 143 | if np.min(poses3d) < -1000 and frame != 0: 144 | poses3d = before_pose 145 | 146 | p3d = poses3d 147 | 148 | viz.show3Dpose(p3d, ax, lcolor="#9b59b6", rcolor="#2ecc71") 149 | before_pose = poses3d 150 | pngName = 'png/test_{0}.png'.format(str(frame)) 151 | plt.savefig(pngName) 152 | 153 | #plt.show() 154 | img = cv2.imread(pngName,0) 155 | rect_cpy = img.copy() 156 | cv2.imshow('3d-pose-baseline', rect_cpy) 157 | done.append(file_name) 158 | if key == ord('q'): 159 | break 160 | except Exception as e: 161 | print (e) 162 | 163 | sess.close() 164 | 165 | 166 | 167 | if __name__ == "__main__": 168 | 169 | openpose_output_dir = FLAGS.openpose 170 | 171 | level = {0:logging.ERROR, 172 | 1:logging.WARNING, 173 | 2:logging.INFO, 174 | 3:logging.DEBUG} 175 | 176 | logger.setLevel(level[FLAGS.verbose]) 177 | 178 | 179 | tf.app.run() -------------------------------------------------------------------------------- /launch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : start_for_windows_10.py 3 | # Author: bilibili_花花花花花歌 4 | # Date : 2021-08-13 22:13 5 | 6 | # 获取当前位置 7 | import os 8 | from re import T 9 | import sys 10 | import shutil 11 | import msvcrt 12 | 13 | from components.Three_D_PoseBaseline_vmd.api import run as td_vmd_run 14 | from components.FCRN_DepthPrediction_vmd.tensorflow.api import run as fc_vmd_run 15 | from components.Three_D_PoseBaseline_multi.applications.api import run as td_mutil_run 16 | 17 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2' 18 | 19 | # 程序工作目录 20 | work_dir = os.getcwd() 21 | # 项目所在磁盘 22 | project_disk_local = work_dir.split(":")[0].upper() 23 | 24 | 25 | def press_any_key_exit(msg): 26 | print(msg) 27 | while True: 28 | if not msvcrt.getch(): 29 | pass 30 | else: 31 | break 32 | sys.exit() 33 | 34 | 35 | # 检测数据完整性 36 | def check_data(): 37 | # 在运行本程序之前,请先下载data.zip,并解压到项目根目录 38 | if not os.path.isdir(os.path.join(os.getcwd(), "data/")): 39 | print("警告:在运行本程序之前,请先下载data.zip,并解压到项目根目录") 40 | return False 41 | if not os.path.isdir(os.path.join(os.getcwd(), "utils/")): 42 | print("警告:在运行本程序之前,请先下载utils.zip,并解压到项目根目录") 43 | return False 44 | return True 45 | 46 | 47 | # 检测视频路径 48 | def check_video_name(video_path): 49 | # 检测视频文件名称 50 | video_name = video_path.split("\\")[-1].split(".")[0] 51 | if video_name.isdigit() and len(video_name) == 12: 52 | print("> 错误:文件名非法,不可为12位纯数字:" + video_name) 53 | return False 54 | # 检测是否位于不同磁盘 55 | video_disk_local = video_path.split(":")[0].upper() 56 | if video_disk_local != project_disk_local: 57 | print("> 错误:请确保本程序与视频同属于一个磁盘内。") 58 | return False 59 | return True 60 | 61 | def main(): 62 | warm = input("警告:运行该脚本将删除上一次本视频解析结果,请做好备份。【回车继续】") 63 | video_path = "" 64 | 65 | # 验证输入视频路径合法性 66 | while True: 67 | video_path = input("> 请输入视频路径:") 68 | if len(video_path) == 0: 69 | continue 70 | 71 | # 将视频路径转化为绝对路径 72 | video_path = os.path.abspath(video_path.replace("\"", "")) 73 | 74 | if not check_video_name(video_path): 75 | continue 76 | break 77 | 78 | 79 | is_custom = input("> 设置是否以自定义模式运行(yes/no)【默认no】:") or "no" 80 | custom = False if is_custom == "no" else True 81 | 82 | # 参数设定 83 | if custom: 84 | is_debug = input("> 是否需要显示详细调试信息(yes/no/warn)【默认no】:") or "no" 85 | depthPrediction_vmd_depth_interval = input( 86 | "设置深度间隔。越小,结果越清晰。【默认10】:") or "10" 87 | baseline_multi_center_xy_scale = input("设置X、Y轴贴图比例【默认30】:") or "30" 88 | baseline_multi_center_z_scale = input("设置z轴贴图比例【默认2】:") or "2" 89 | baseline_multi_global_x_angle = input( 90 | "设置三维转换后,X轴上的全局角度/坡度(-180到180)【默认15】:") or "15" 91 | baseline_multi_center_decimation_move = input( 92 | "设置中心采样偏移(center decimation move)【默认0】:") or "0" 93 | baseline_multi_ik_decimation_move = input( 94 | "设置IK采样偏移(IK decimation move)【默认1.5】:") or "1.5" 95 | baseline_multi_decimation_angle = input( 96 | "设置采样角度(decimation angle)(-180 到 180, 整数)【默认10】:") or "10" 97 | baseline_multi_is_alignment = input( 98 | "设置是否对齐(Alignment)(yes/no/warn)【默认yes】:") or "yes" 99 | baseline_multi_is_ik = input( 100 | "设置是否让输出的vmd文件带有IK/FK foot setting(yes/no)【默认yes】:") or "yes" 101 | baseline_multi_heel_position = input( 102 | "设置脚跟在Y轴上的位置,即脚跟和地面之间的距离)【默认0】:") or "0" 103 | else: 104 | is_debug = "no" 105 | depthPrediction_vmd_depth_interval = "10" 106 | baseline_multi_center_xy_scale = "30" 107 | baseline_multi_center_z_scale = "2" 108 | baseline_multi_global_x_angle = "15" 109 | baseline_multi_center_decimation_move = "0" 110 | baseline_multi_ik_decimation_move = "1.5" 111 | baseline_multi_decimation_angle = "10" 112 | baseline_multi_is_alignment = "yes" 113 | baseline_multi_is_ik = "yes" 114 | baseline_multi_heel_position = "0" 115 | 116 | # 设置日志级别 117 | if is_debug == "yes": 118 | log_verbose = "3" 119 | elif is_debug == "warn": 120 | log_verbose = "1" 121 | else: 122 | log_verbose = "2" 123 | 124 | 125 | # 需要解析视频的所在目录 126 | video_dir = os.path.dirname(video_path) 127 | # 结果文件路径 128 | result_path = os.path.join(video_dir, "result") 129 | 130 | # (清空)上次结果 131 | if os.path.exists(result_path): 132 | shutil.rmtree(result_path) 133 | os.makedirs(result_path) 134 | 135 | print("> 开始进行视频姿态识别") 136 | # OpenPose组件相关路径 137 | openpose_path = os.path.join(work_dir, "utils/openpose/bin/OpenPoseDemo.exe") 138 | openpose_write_json_path = os.path.join(video_dir, "_json") 139 | openpose_write_video_path = os.path.join(video_dir, "_openpose.avi") 140 | 141 | # 删除上次结果 142 | if os.path.exists(openpose_write_json_path): 143 | shutil.rmtree(openpose_write_json_path) 144 | os.makedirs(openpose_write_json_path) 145 | 146 | if os.path.exists(openpose_write_video_path): 147 | os.remove(openpose_write_video_path) 148 | 149 | # 运行组件 150 | os.chdir(os.path.join(work_dir, "utils/openpose")) 151 | cmd_part_1 = "{} --model_pose COCO --video {} --write_json {} --write_video {} --number_people_max 1 --net_resolution \"-1x240\"".format(openpose_path, video_path, openpose_write_json_path, openpose_write_video_path) 152 | os.system(cmd_part_1) 153 | os.chdir(work_dir) 154 | 155 | print("> 开始生成3D姿态平面数据") 156 | # 删除上次结果 157 | baseline_vmd_result_name = "" 158 | for file_name in os.listdir(video_dir): 159 | if "_json_3d_" in file_name: 160 | baseline_vmd_result_name = file_name 161 | baseline_vmd_result_path = os.path.join(video_dir, baseline_vmd_result_name) 162 | shutil.rmtree(baseline_vmd_result_path) 163 | 164 | # 运行组件 165 | td_vmd_run(dropout=0.5, epochs=200, load=4874200, gif_fps=30, verbose=int(log_verbose), openpose=openpose_write_json_path) 166 | 167 | # 获取结果文件路径 168 | baseline_vmd_result_name = "" 169 | for file_name in os.listdir(video_dir): 170 | if "_json_3d_" in file_name: 171 | baseline_vmd_result_name = file_name 172 | 173 | if baseline_vmd_result_name == "": 174 | print("错误:未找到3d-pose-baseline-vmd组件生成结果,请检查上一步操作。") 175 | press_any_key_exit("按任意键退出...") 176 | else: 177 | baseline_vmd_result_path = os.path.join(video_dir, baseline_vmd_result_name) 178 | 179 | print("> 开始进行姿态数据深度推定") 180 | 181 | # 运行组件 182 | fc_vmd_run(video_path=openpose_write_video_path, baseline_path=baseline_vmd_result_path, interval=int(depthPrediction_vmd_depth_interval), verbose=int(log_verbose)) 183 | 184 | print("> 开始生成vmd动作文件") 185 | 186 | if baseline_multi_is_alignment == "no": 187 | baseline_multi_alignment = "0" 188 | else: 189 | baseline_multi_alignment = "1" 190 | 191 | if baseline_multi_is_ik == "no": 192 | baseline_multi_ik_flag = "0" 193 | else: 194 | baseline_multi_ik_flag = "1" 195 | 196 | # 运行组件 197 | td_mutil_run(target=baseline_vmd_result_path, 198 | verbose=int(log_verbose), 199 | centerxy=int(baseline_multi_center_xy_scale), 200 | centerz=float(baseline_multi_center_z_scale), 201 | xangle=int(baseline_multi_global_x_angle), 202 | ddecimation=int(baseline_multi_decimation_angle), 203 | mdecimation=float(baseline_multi_center_decimation_move), 204 | idecimation=float(baseline_multi_ik_decimation_move), 205 | alignment=int(baseline_multi_alignment), 206 | legik=int(baseline_multi_ik_flag), 207 | heelpos=float(baseline_multi_heel_position)) 208 | 209 | # 获取结果文件路径 210 | result_vmd_name = "" 211 | result_depth_gif_path = "" 212 | result_smooth_gif_path = "" 213 | 214 | for file_name in os.listdir(baseline_vmd_result_path): 215 | if "output_" in file_name: 216 | result_vmd_name = file_name 217 | 218 | if result_vmd_name == "": 219 | print("错误:未找到VMD-3d-pose-baseline-multi组件生成结果,请检查上一步操作。") 220 | press_any_key_exit("按任意键退出...") 221 | else: 222 | result_vmd_path = os.path.join(baseline_vmd_result_path, result_vmd_name) 223 | result_smooth_gif_path = os.path.join(baseline_vmd_result_path, "movie_depth.gif") 224 | result_depth_gif_path = os.path.join(baseline_vmd_result_path, "movie_smoothing.gif") 225 | 226 | # 复制结果文件 227 | shutil.copy(result_vmd_path, result_path) 228 | shutil.copy(result_depth_gif_path, result_path) 229 | shutil.copy(result_smooth_gif_path, result_path) 230 | 231 | os.system("cls") 232 | print("视频解析完成!") 233 | print("动作捕捉结果文件路径:" + result_path) 234 | 235 | os.system("explorer.exe {}".format(result_path)) 236 | 237 | 238 | if __name__ == "__main__": 239 | if check_data(): 240 | main() 241 | 242 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/predict_video.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import numpy as np 4 | import tensorflow as tf 5 | from matplotlib import pyplot as plt 6 | from PIL import Image 7 | import logging 8 | import cv2 9 | import datetime 10 | import os 11 | import re 12 | import shutil 13 | import imageio 14 | import models 15 | 16 | logging.basicConfig(level=logging.INFO) 17 | logger = logging.getLogger(__name__) 18 | 19 | level = {0:logging.ERROR, 20 | 1:logging.WARNING, 21 | 2:logging.INFO, 22 | 3:logging.DEBUG} 23 | 24 | def predict_video(model_path, video_path, baseline_path, interval, smoothed_2d): 25 | logger.info("深度推定出力開始") 26 | 27 | # 深度用サブディレクトリ 28 | subdir = '{0}/depth'.format(baseline_path) 29 | if os.path.exists(subdir): 30 | # 既にディレクトリがある場合、一旦削除 31 | shutil.rmtree(subdir) 32 | os.makedirs(subdir) 33 | 34 | #関節位置情報ファイル 35 | depthf = open(baseline_path +'/depth.txt', 'w') 36 | 37 | # Default input size 38 | height = 288 39 | width = 512 40 | channels = 3 41 | batch_size = 1 42 | scale = 0 43 | 44 | # # tensorflowをリセットする 45 | # tf.reset_default_graph() 46 | 47 | # 映像サイズを取得する 48 | n = 0 49 | cap = cv2.VideoCapture(video_path) 50 | while(cap.isOpened()): 51 | orig_width = cap.get(3) # float 52 | orig_height = cap.get(4) # float 53 | logger.debug("width: {0}, height: {1}".format(orig_width, orig_height)) 54 | 55 | # 縮小倍率 56 | scale = width / orig_width 57 | 58 | logger.debug("scale: {0}".format(scale)) 59 | 60 | height = int(orig_height * scale) 61 | 62 | logger.debug("width: {0}, height: {1}".format(width, height)) 63 | 64 | break 65 | 66 | # 再設定したサイズでtensorflow準備 67 | # Create a placeholder for the input image 68 | input_node = tf.placeholder(tf.float32, shape=(None, height, width, channels)) 69 | 70 | # Construct the network 71 | net = models.ResNet50UpProj({'data': input_node}, batch_size, 1, False) 72 | 73 | png_lib = [] 74 | 75 | with tf.Session() as sess: 76 | 77 | # Use to load from ckpt file 78 | saver = tf.train.Saver() 79 | saver.restore(sess, model_path) 80 | 81 | # 動画を1枚ずつ画像に変換する 82 | n = 0 83 | cap = cv2.VideoCapture(video_path) 84 | while(cap.isOpened()): 85 | # 動画から1枚キャプチャして読み込む 86 | flag, frame = cap.read() # Capture frame-by-frame 87 | # キャプチャが終わっていたら終了 88 | if flag == False: # Is a frame left? 89 | break 90 | 91 | if n % interval == 0: 92 | # 先に間引き分同じのを追加 93 | if interval > 1 and n > 0: 94 | for m in range(interval - 1): 95 | # logger.debug("間引き分追加 {0}".format(m)) 96 | png_lib.append(imageio.imread("{0}/depth_{1:012d}.png".format(subdir, n - interval))) 97 | 98 | # 一定間隔フレームおきにキャプチャした画像を深度推定する 99 | logger.info("深度推定: n={0}".format(n)) 100 | 101 | # キャプチャ画像を読み込む 102 | img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) 103 | img = img.resize([width,height], Image.ANTIALIAS) 104 | img = np.array(img).astype('float32') 105 | img = np.expand_dims(np.asarray(img), axis = 0) 106 | 107 | # Use to load from npy file 108 | #net.load(model_path, sess) 109 | 110 | # Evalute the network for the given image 111 | pred = sess.run(net.get_output(), feed_dict={input_node: img}) 112 | 113 | # 深度解析後の画像サイズ 114 | pred_height = len(pred[0]) 115 | pred_width = len(pred[0][0]) 116 | 117 | logger.debug("smoothed_2d[n] {0}".format(smoothed_2d[n])) 118 | 119 | # 両足の付け根の中間を取得する 120 | smoothed_center_x = np.average([smoothed_2d[n][0][0], smoothed_2d[n][1][0]]) 121 | smoothed_center_y = np.average([smoothed_2d[n][0][1], smoothed_2d[n][1][1]]) 122 | 123 | logger.debug("smoothed_center_x: {0}, smoothed_center_y: {1}".format(smoothed_center_x, smoothed_center_y)) 124 | 125 | # オリジナルの画像サイズから、縮尺を取得 126 | scale_orig_x = smoothed_center_x / orig_width 127 | scale_orig_y = smoothed_center_y / orig_height 128 | 129 | logger.debug("scale_orig_x: {0}, scale_orig_y: {1}".format(scale_orig_x, scale_orig_y)) 130 | 131 | # 縮尺を展開して、深度解析後の画像サイズに合わせる 132 | pred_x = int(pred_width * scale_orig_x) 133 | pred_y = int(pred_height * scale_orig_y) 134 | 135 | logger.debug("pred_x: {0}, pred_y: {1}, depth: {2}".format(pred_x, pred_y, pred[0][pred_y][pred_x][0])) 136 | 137 | # 深度ファイルに出力 138 | depthf.write("{0}, {1}\n".format(n, pred[0][pred_y][pred_x][0])) 139 | 140 | # Plot result 141 | plt.cla() 142 | plt.clf() 143 | ii = plt.imshow(pred[0,:,:,0], interpolation='nearest') 144 | plt.colorbar(ii) 145 | 146 | # 散布図のようにして、出力に使ったポイントを明示 147 | plt.scatter(pred_x, pred_y, s=5, c="#FFFFFF") 148 | 149 | # 深度画像保存 150 | plotName = "{0}/depth_{1:012d}.png".format(subdir, n) 151 | plt.savefig(plotName) 152 | logger.debug("Save: {0}".format(plotName)) 153 | 154 | # アニメーションGIF用に保持 155 | png_lib.append(imageio.imread(plotName)) 156 | 157 | plt.close() 158 | 159 | n += 1 160 | 161 | logger.info("creating Gif {0}/movie_depth.gif, please Wait!".format(baseline_path)) 162 | imageio.mimsave('{0}/movie_depth.gif'.format(baseline_path), png_lib, fps=30) 163 | 164 | # 終わったら後処理 165 | cap.release() 166 | cv2.destroyAllWindows() 167 | 168 | logger.info("Done!!") 169 | logger.info("深度推定結果: {0}".format(subdir)) 170 | 171 | 172 | def predict(model_data_path, image_path): 173 | # Default input size 174 | height = 228 175 | width = 304 176 | channels = 3 177 | batch_size = 1 178 | 179 | # Read image 180 | img = Image.open(image_path) 181 | img = img.resize([width,height], Image.ANTIALIAS) 182 | img = np.array(img).astype('float32') 183 | img = np.expand_dims(np.asarray(img), axis = 0) 184 | 185 | # Create a placeholder for the input image 186 | input_node = tf.placeholder(tf.float32, shape=(None, height, width, channels)) 187 | 188 | # Construct the network 189 | net = models.ResNet50UpProj({'data': input_node}, batch_size, 1, False) 190 | 191 | with tf.Session() as sess: 192 | 193 | # Load the converted parameters 194 | print('Loading the model') 195 | 196 | # Use to load from ckpt file 197 | saver = tf.train.Saver() 198 | saver.restore(sess, model_data_path) 199 | 200 | # Use to load from npy file 201 | #net.load(model_data_path, sess) 202 | 203 | # Evalute the network for the given image 204 | pred = sess.run(net.get_output(), feed_dict={input_node: img}) 205 | 206 | # Plot result 207 | fig = plt.figure() 208 | ii = plt.imshow(pred[0,:,:,0], interpolation='nearest') 209 | fig.colorbar(ii) 210 | plt.show() 211 | 212 | return pred 213 | 214 | 215 | # 関節2次元情報を取得 216 | def load_smoothed_2d(smoothed_file): 217 | smoothed_2d = [] 218 | n = 0 219 | with open(smoothed_file, "r") as sf: 220 | line = sf.readline() # 1行を文字列として読み込む(改行文字も含まれる) 221 | 222 | while line: 223 | # 空白で複数項目に分解 224 | smoothed = re.split("\s+", line) 225 | 226 | # logger.debug(smoothed) 227 | 228 | smoothed_2d.append([ \ 229 | # 右足付け根 230 | [float(smoothed[16]), float(smoothed[17])], \ 231 | # 左足付け根 232 | [float(smoothed[22]), float(smoothed[23])] \ 233 | ]) 234 | 235 | n += 1 236 | 237 | line = sf.readline() 238 | 239 | return smoothed_2d 240 | 241 | def main(): 242 | # Parse arguments 243 | parser = argparse.ArgumentParser() 244 | parser.add_argument('--model_path', dest='model_path', help='Converted parameters for the model', type=str) 245 | parser.add_argument('--video_path', dest='video_path', help='input video', type=str) 246 | parser.add_argument('--baseline_path', dest='baseline_path', help='baseline result path', type=str) 247 | parser.add_argument('--interval', dest='interval', help='interval', type=int) 248 | parser.add_argument('--verbose', dest='verbose', help='verbose', type=int) 249 | args = parser.parse_args() 250 | 251 | logger.setLevel(level[args.verbose]) 252 | 253 | # 関節二次元データを取得 254 | smoothed_2d = load_smoothed_2d("{0}/smoothed.txt".format(args.baseline_path)) 255 | 256 | # 間隔は1以上の整数 257 | interval = args.interval if args.interval > 0 else 1 258 | 259 | # Predict the image 260 | predict_video(args.model_path, args.video_path, args.baseline_path, interval, smoothed_2d) 261 | 262 | if __name__ == '__main__': 263 | main() 264 | 265 | 266 | 267 | 268 | 269 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/applications/pos2vmd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # pos2vmd.py - convert joint position data to VMD 5 | 6 | from __future__ import print_function 7 | 8 | def usage(prog): 9 | print('usage: ' + prog + ' POSITION_FILE VMD_FILE') 10 | sys.exit() 11 | 12 | import re 13 | from PyQt5.QtGui import QQuaternion, QVector3D 14 | from VmdWriter import VmdBoneFrame, VmdInfoIk, VmdShowIkFrame, VmdWriter 15 | 16 | def positions_to_frames(pos, head_rotation=None): 17 | """convert positions to bone frames""" 18 | frames = [] 19 | # 上半身 20 | bf = VmdBoneFrame() 21 | bf.name = b'\x8f\xe3\x94\xbc\x90\x67' # '上半身' 22 | direction = pos[8] - pos[7] 23 | up = QVector3D.crossProduct(direction, (pos[14] - pos[11])).normalized() 24 | upper_body_orientation = QQuaternion.fromDirection(direction, up) 25 | initial = QQuaternion.fromDirection(QVector3D(0, 1, 0), QVector3D(0, 0, 1)) 26 | bf.rotation = upper_body_orientation * initial.inverted() 27 | frames.append(bf) 28 | upper_body_rotation = bf.rotation 29 | 30 | # 下半身 31 | bf = VmdBoneFrame() 32 | bf.name = b'\x89\xba\x94\xbc\x90\x67' # '下半身' 33 | direction = pos[0] - pos[7] 34 | up = QVector3D.crossProduct(direction, (pos[4] - pos[1])) 35 | lower_body_orientation = QQuaternion.fromDirection(direction, up) 36 | initial = QQuaternion.fromDirection(QVector3D(0, -1, 0), QVector3D(0, 0, 1)) 37 | bf.rotation = lower_body_orientation * initial.inverted() 38 | lower_body_rotation = bf.rotation 39 | frames.append(bf) 40 | 41 | # 首は回転させず、頭のみ回転させる 42 | # 頭 43 | bf = VmdBoneFrame() 44 | bf.name = b'\x93\xaa' # '頭' 45 | if head_rotation is None: 46 | # direction = pos[10] - pos[9] 47 | direction = pos[10] - pos[8] 48 | up = QVector3D.crossProduct((pos[9] - pos[8]), (pos[10] - pos[9])) 49 | orientation = QQuaternion.fromDirection(direction, up) 50 | initial_orientation = QQuaternion.fromDirection(QVector3D(0, 1, 0), QVector3D(1, 0, 0)) 51 | rotation = orientation * initial_orientation.inverted() 52 | bf.rotation = upper_body_rotation.inverted() * rotation 53 | else: 54 | bf.rotation = upper_body_rotation.inverted() * head_rotation 55 | frames.append(bf) 56 | 57 | # 左腕 58 | bf = VmdBoneFrame() 59 | bf.name = b'\x8d\xb6\x98\x72' # '左腕' 60 | direction = pos[12] - pos[11] 61 | up = QVector3D.crossProduct((pos[12] - pos[11]), (pos[13] - pos[12])) 62 | orientation = QQuaternion.fromDirection(direction, up) 63 | initial_orientation = QQuaternion.fromDirection(QVector3D(1.73, -1, 0), QVector3D(1, 1.73, 0)) 64 | rotation = orientation * initial_orientation.inverted() 65 | # 左腕ポーンの回転から親ボーンの回転を差し引いてbf.rotationに格納する。 66 | # upper_body_rotation * bf.rotation = rotation なので、 67 | bf.rotation = upper_body_rotation.inverted() * rotation 68 | left_arm_rotation = bf.rotation # 後で使うので保存しておく 69 | frames.append(bf) 70 | 71 | # 左ひじ 72 | bf = VmdBoneFrame() 73 | bf.name = b'\x8d\xb6\x82\xd0\x82\xb6' # '左ひじ' 74 | direction = pos[13] - pos[12] 75 | up = QVector3D.crossProduct((pos[12] - pos[11]), (pos[13] - pos[12])) 76 | orientation = QQuaternion.fromDirection(direction, up) 77 | initial_orientation = QQuaternion.fromDirection(QVector3D(1.73, -1, 0), QVector3D(1, 1.73, 0)) 78 | rotation = orientation * initial_orientation.inverted() 79 | # 左ひじポーンの回転から親ボーンの回転を差し引いてbf.rotationに格納する。 80 | # upper_body_rotation * left_arm_rotation * bf.rotation = rotation なので、 81 | bf.rotation = left_arm_rotation.inverted() * upper_body_rotation.inverted() * rotation 82 | # bf.rotation = (upper_body_rotation * left_arm_rotation).inverted() * rotation # 別の表現 83 | frames.append(bf) 84 | 85 | 86 | # 右腕 87 | bf = VmdBoneFrame() 88 | bf.name = b'\x89\x45\x98\x72' # '右腕' 89 | direction = pos[15] - pos[14] 90 | up = QVector3D.crossProduct((pos[15] - pos[14]), (pos[16] - pos[15])) 91 | orientation = QQuaternion.fromDirection(direction, up) 92 | initial_orientation = QQuaternion.fromDirection(QVector3D(-1.73, -1, 0), QVector3D(1, -1.73, 0)) 93 | rotation = orientation * initial_orientation.inverted() 94 | bf.rotation = upper_body_rotation.inverted() * rotation 95 | right_arm_rotation = bf.rotation 96 | frames.append(bf) 97 | 98 | # 右ひじ 99 | bf = VmdBoneFrame() 100 | bf.name = b'\x89\x45\x82\xd0\x82\xb6' # '右ひじ' 101 | direction = pos[16] - pos[15] 102 | up = QVector3D.crossProduct((pos[15] - pos[14]), (pos[16] - pos[15])) 103 | orientation = QQuaternion.fromDirection(direction, up) 104 | initial_orientation = QQuaternion.fromDirection(QVector3D(-1.73, -1, 0), QVector3D(1, -1.73, 0)) 105 | rotation = orientation * initial_orientation.inverted() 106 | bf.rotation = right_arm_rotation.inverted() * upper_body_rotation.inverted() * rotation 107 | frames.append(bf) 108 | 109 | # 左足 110 | bf = VmdBoneFrame() 111 | bf.name = b'\x8d\xb6\x91\xab' # '左足' 112 | direction = pos[5] - pos[4] 113 | up = QVector3D.crossProduct((pos[5] - pos[4]), (pos[6] - pos[5])) 114 | orientation = QQuaternion.fromDirection(direction, up) 115 | initial_orientation = QQuaternion.fromDirection(QVector3D(0, -1, 0), QVector3D(-1, 0, 0)) 116 | rotation = orientation * initial_orientation.inverted() 117 | bf.rotation = lower_body_rotation.inverted() * rotation 118 | left_leg_rotation = bf.rotation 119 | frames.append(bf) 120 | 121 | # 左ひざ 122 | bf = VmdBoneFrame() 123 | bf.name = b'\x8d\xb6\x82\xd0\x82\xb4' # '左ひざ' 124 | direction = pos[6] - pos[5] 125 | up = QVector3D.crossProduct((pos[5] - pos[4]), (pos[6] - pos[5])) 126 | orientation = QQuaternion.fromDirection(direction, up) 127 | initial_orientation = QQuaternion.fromDirection(QVector3D(0, -1, 0), QVector3D(-1, 0, 0)) 128 | rotation = orientation * initial_orientation.inverted() 129 | bf.rotation = left_leg_rotation.inverted() * lower_body_rotation.inverted() * rotation 130 | frames.append(bf) 131 | 132 | # 右足 133 | bf = VmdBoneFrame() 134 | bf.name = b'\x89\x45\x91\xab' # '右足' 135 | direction = pos[2] - pos[1] 136 | up = QVector3D.crossProduct((pos[2] - pos[1]), (pos[3] - pos[2])) 137 | orientation = QQuaternion.fromDirection(direction, up) 138 | initial_orientation = QQuaternion.fromDirection(QVector3D(0, -1, 0), QVector3D(-1, 0, 0)) 139 | rotation = orientation * initial_orientation.inverted() 140 | bf.rotation = lower_body_rotation.inverted() * rotation 141 | right_leg_rotation = bf.rotation 142 | frames.append(bf) 143 | 144 | # 右ひざ 145 | bf = VmdBoneFrame() 146 | bf.name = b'\x89\x45\x82\xd0\x82\xb4' # '右ひざ' 147 | direction = pos[3] - pos[2] 148 | up = QVector3D.crossProduct((pos[2] - pos[1]), (pos[3] - pos[2])) 149 | orientation = QQuaternion.fromDirection(direction, up) 150 | initial_orientation = QQuaternion.fromDirection(QVector3D(0, -1, 0), QVector3D(-1, 0, 0)) 151 | rotation = orientation * initial_orientation.inverted() 152 | bf.rotation = right_leg_rotation.inverted() * lower_body_rotation.inverted() * rotation 153 | frames.append(bf) 154 | 155 | return frames 156 | 157 | def make_showik_frames(): 158 | frames = [] 159 | sf = VmdShowIkFrame() 160 | sf.show = 1 161 | sf.ik.append(VmdInfoIk(b'\x8d\xb6\x91\xab\x82\x68\x82\x6a', 0)) # '左足IK' 162 | sf.ik.append(VmdInfoIk(b'\x89\x45\x91\xab\x82\x68\x82\x6a', 0)) # '右足IK' 163 | sf.ik.append(VmdInfoIk(b'\x8d\xb6\x82\xc2\x82\xdc\x90\xe6\x82\x68\x82\x6a', 0)) # '左つま先IK' 164 | sf.ik.append(VmdInfoIk(b'\x89\x45\x82\xc2\x82\xdc\x90\xe6\x82\x68\x82\x6a', 0)) # '右つま先IK' 165 | frames.append(sf) 166 | return frames 167 | 168 | def read_positions(position_file): 169 | """Read joint position data""" 170 | f = open(position_file, "r") 171 | 172 | positions = [] 173 | while True: 174 | line = f.readline() 175 | if not line: 176 | break 177 | line = line.rstrip('\r\n') 178 | a = re.split(' ', line) 179 | # 元データはz軸が垂直上向き。MMDに合わせるためにyとzを入れ替える。 180 | q = QVector3D(float(a[1]), float(a[3]), float(a[2])) # a[0]: index 181 | positions.append(q) # a[0]: index 182 | f.close() 183 | return positions 184 | 185 | def convert_position(pose_3d): 186 | positions = [] 187 | for pose in pose_3d: 188 | for j in range(pose.shape[1]): 189 | q = QVector3D(pose[0, j], pose[2, j], pose[1, j]) 190 | positions.append(q) 191 | return positions 192 | 193 | def position_list_to_vmd(positions, vmd_file, head_rotation=None, expression_frames=None): 194 | bone_frames = [] 195 | bf = positions_to_frames(positions, head_rotation) 196 | bone_frames.extend(bf) 197 | showik_frames = make_showik_frames() 198 | writer = VmdWriter() 199 | # writer.write_vmd_file(vmd_file, bone_frames, showik_frames, expression_frames) 200 | writer.write_vmd_file(vmd_file, bone_frames, showik_frames) 201 | 202 | def pos2vmd(pose_3d, vmd_file, head_rotation=None, expression_frames=None): 203 | positions = convert_position(pose_3d) 204 | position_list_to_vmd(positions, vmd_file, head_rotation, expression_frames) 205 | 206 | def position_file_to_vmd(position_file, vmd_file): 207 | positions = read_positions(position_file) 208 | position_list_to_vmd(positions, vmd_file) 209 | 210 | if __name__ == '__main__': 211 | import sys 212 | if (len(sys.argv) < 3): 213 | usage(sys.argv[0]) 214 | 215 | position_file = sys.argv[1] 216 | vmd_file = sys.argv[2] 217 | 218 | position_file_to_vmd(position_file, vmd_file) 219 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/api.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import re 4 | import cv2 5 | 6 | import shutil 7 | import logging 8 | import imageio 9 | import datetime 10 | import argparse 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | 15 | from PIL import Image 16 | from matplotlib import pyplot as plt 17 | 18 | from . import models 19 | from . import com_path 20 | 21 | logging.basicConfig(level=logging.INFO) 22 | logger = logging.getLogger(__name__) 23 | 24 | level = {0:logging.ERROR, 25 | 1:logging.WARNING, 26 | 2:logging.INFO, 27 | 3:logging.DEBUG} 28 | 29 | def predict_video(model_path, video_path, baseline_path, interval, smoothed_2d): 30 | logger.info("深度推定出力開始") 31 | 32 | # 深度用サブディレクトリ 33 | subdir = '{0}/depth'.format(baseline_path) 34 | if os.path.exists(subdir): 35 | # 既にディレクトリがある場合、一旦削除 36 | shutil.rmtree(subdir) 37 | os.makedirs(subdir) 38 | 39 | #関節位置情報ファイル 40 | depthf = open(baseline_path +'/depth.txt', 'w') 41 | 42 | # Default input size 43 | height = 288 44 | width = 512 45 | channels = 3 46 | batch_size = 1 47 | scale = 0 48 | 49 | # # tensorflowをリセットする 50 | tf.reset_default_graph() 51 | 52 | # 映像サイズを取得する 53 | n = 0 54 | cap = cv2.VideoCapture(video_path) 55 | while(cap.isOpened()): 56 | orig_width = cap.get(3) # float 57 | orig_height = cap.get(4) # float 58 | logger.debug("width: {0}, height: {1}".format(orig_width, orig_height)) 59 | 60 | # 縮小倍率 61 | scale = width / orig_width 62 | 63 | logger.debug("scale: {0}".format(scale)) 64 | 65 | height = int(orig_height * scale) 66 | 67 | logger.debug("width: {0}, height: {1}".format(width, height)) 68 | 69 | break 70 | 71 | # 再設定したサイズでtensorflow準備 72 | # Create a placeholder for the input image 73 | input_node = tf.placeholder(tf.float32, shape=(None, height, width, channels)) 74 | 75 | # Construct the network 76 | net = models.ResNet50UpProj({'data': input_node}, batch_size, 1, False) 77 | 78 | png_lib = [] 79 | 80 | with tf.Session() as sess: 81 | 82 | # Use to load from ckpt file 83 | saver = tf.train.Saver() 84 | saver.restore(sess, model_path) 85 | 86 | # 動画を1枚ずつ画像に変換する 87 | n = 0 88 | cap = cv2.VideoCapture(video_path) 89 | while(cap.isOpened()): 90 | # 動画から1枚キャプチャして読み込む 91 | flag, frame = cap.read() # Capture frame-by-frame 92 | # キャプチャが終わっていたら終了 93 | if flag == False: # Is a frame left? 94 | break 95 | 96 | if n % interval == 0: 97 | # 先に間引き分同じのを追加 98 | if interval > 1 and n > 0: 99 | for m in range(interval - 1): 100 | # logger.debug("間引き分追加 {0}".format(m)) 101 | png_lib.append(imageio.imread("{0}/depth_{1:012d}.png".format(subdir, n - interval))) 102 | 103 | # 一定間隔フレームおきにキャプチャした画像を深度推定する 104 | # logger.info("深度推定: n={0}".format(n)) 105 | print("\r", "深度推定: n={0}".format(n), end = '' ) 106 | 107 | # キャプチャ画像を読み込む 108 | img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) 109 | img = img.resize([width,height], Image.ANTIALIAS) 110 | img = np.array(img).astype('float32') 111 | img = np.expand_dims(np.asarray(img), axis = 0) 112 | 113 | # Use to load from npy file 114 | #net.load(model_path, sess) 115 | 116 | # Evalute the network for the given image 117 | pred = sess.run(net.get_output(), feed_dict={input_node: img}) 118 | 119 | # 深度解析後の画像サイズ 120 | pred_height = len(pred[0]) 121 | pred_width = len(pred[0][0]) 122 | 123 | logger.debug("smoothed_2d[n] {0}".format(smoothed_2d[n])) 124 | 125 | # 両足の付け根の中間を取得する 126 | smoothed_center_x = np.average([smoothed_2d[n][0][0], smoothed_2d[n][1][0]]) 127 | smoothed_center_y = np.average([smoothed_2d[n][0][1], smoothed_2d[n][1][1]]) 128 | 129 | logger.debug("smoothed_center_x: {0}, smoothed_center_y: {1}".format(smoothed_center_x, smoothed_center_y)) 130 | 131 | # オリジナルの画像サイズから、縮尺を取得 132 | scale_orig_x = smoothed_center_x / orig_width 133 | scale_orig_y = smoothed_center_y / orig_height 134 | 135 | logger.debug("scale_orig_x: {0}, scale_orig_y: {1}".format(scale_orig_x, scale_orig_y)) 136 | 137 | # 縮尺を展開して、深度解析後の画像サイズに合わせる 138 | pred_x = int(pred_width * scale_orig_x) 139 | pred_y = int(pred_height * scale_orig_y) 140 | 141 | logger.debug("pred_x: {0}, pred_y: {1}, depth: {2}".format(pred_x, pred_y, pred[0][pred_y][pred_x][0])) 142 | 143 | # 深度ファイルに出力 144 | depthf.write("{0}, {1}\n".format(n, pred[0][pred_y][pred_x][0])) 145 | 146 | # Plot result 147 | plt.cla() 148 | plt.clf() 149 | ii = plt.imshow(pred[0,:,:,0], interpolation='nearest') 150 | plt.colorbar(ii) 151 | 152 | # 散布図のようにして、出力に使ったポイントを明示 153 | plt.scatter(pred_x, pred_y, s=5, c="#FFFFFF") 154 | 155 | # 深度画像保存 156 | plotName = "{0}/depth_{1:012d}.png".format(subdir, n) 157 | plt.savefig(plotName) 158 | logger.debug("Save: {0}".format(plotName)) 159 | 160 | # アニメーションGIF用に保持 161 | png_lib.append(imageio.imread(plotName)) 162 | 163 | plt.close() 164 | 165 | n += 1 166 | 167 | print("深度推定完成。") 168 | 169 | logger.info("creating Gif {0}/movie_depth.gif, please Wait!".format(baseline_path)) 170 | imageio.mimsave('{0}/movie_depth.gif'.format(baseline_path), png_lib, fps=30) 171 | 172 | # 終わったら後処理 173 | cap.release() 174 | cv2.destroyAllWindows() 175 | 176 | logger.info("Done!!深度推定結果: {0}".format(subdir)) 177 | 178 | 179 | def predict(model_data_path, image_path): 180 | # Default input size 181 | height = 228 182 | width = 304 183 | channels = 3 184 | batch_size = 1 185 | 186 | # Read image 187 | img = Image.open(image_path) 188 | img = img.resize([width,height], Image.ANTIALIAS) 189 | img = np.array(img).astype('float32') 190 | img = np.expand_dims(np.asarray(img), axis = 0) 191 | 192 | # Create a placeholder for the input image 193 | input_node = tf.placeholder(tf.float32, shape=(None, height, width, channels)) 194 | 195 | # Construct the network 196 | net = models.ResNet50UpProj({'data': input_node}, batch_size, 1, False) 197 | 198 | with tf.Session() as sess: 199 | 200 | # Load the converted parameters 201 | print('Loading the model') 202 | 203 | # Use to load from ckpt file 204 | saver = tf.train.Saver() 205 | saver.restore(sess, model_data_path) 206 | 207 | # Use to load from npy file 208 | #net.load(model_data_path, sess) 209 | 210 | # Evalute the network for the given image 211 | pred = sess.run(net.get_output(), feed_dict={input_node: img}) 212 | 213 | # Plot result 214 | fig = plt.figure() 215 | ii = plt.imshow(pred[0,:,:,0], interpolation='nearest') 216 | fig.colorbar(ii) 217 | plt.show() 218 | 219 | return pred 220 | 221 | 222 | # 関節2次元情報を取得 223 | def load_smoothed_2d(smoothed_file): 224 | smoothed_2d = [] 225 | n = 0 226 | with open(smoothed_file, "r") as sf: 227 | line = sf.readline() # 1行を文字列として読み込む(改行文字も含まれる) 228 | 229 | while line: 230 | # 空白で複数項目に分解 231 | smoothed = re.split("\s+", line) 232 | 233 | # logger.debug(smoothed) 234 | 235 | smoothed_2d.append([ \ 236 | # 右足付け根 237 | [float(smoothed[16]), float(smoothed[17])], \ 238 | # 左足付け根 239 | [float(smoothed[22]), float(smoothed[23])] \ 240 | ]) 241 | 242 | n += 1 243 | 244 | line = sf.readline() 245 | 246 | return smoothed_2d 247 | 248 | 249 | def run(video_path, baseline_path, interval, verbose): 250 | logger.setLevel(level[verbose]) 251 | # 関節二次元データを取得 252 | smoothed_2d = load_smoothed_2d("{0}/smoothed.txt".format(baseline_path)) 253 | # 間隔は1以上の整数 254 | interval = interval if interval > 0 else 1 255 | # Predict the image 256 | model_path = os.path.join(os.getcwd(), "data/NYU_FCRN.ckpt") 257 | predict_video(model_path, video_path, baseline_path, interval, smoothed_2d) 258 | 259 | if __name__ == '__main__': 260 | # Parse arguments 261 | parser = argparse.ArgumentParser() 262 | parser.add_argument('--model_path', dest='model_path', help='Converted parameters for the model', type=str) 263 | parser.add_argument('--video_path', dest='video_path', help='input video', type=str) 264 | parser.add_argument('--baseline_path', dest='baseline_path', help='baseline result path', type=str) 265 | parser.add_argument('--interval', dest='interval', help='interval', type=int) 266 | parser.add_argument('--verbose', dest='verbose', help='verbose', type=int) 267 | args = parser.parse_args() 268 | 269 | logger.setLevel(level[args.verbose]) 270 | # 関節二次元データを取得 271 | smoothed_2d = load_smoothed_2d("{0}/smoothed.txt".format(args.baseline_path)) 272 | # 間隔は1以上の整数 273 | interval = args.interval if args.interval > 0 else 1 274 | # Predict the image 275 | predict_video(args.model_path, args.video_path, args.baseline_path, interval, smoothed_2d) 276 | 277 | 278 | 279 | 280 | 281 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/packages/lifting/utils/prob_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Apr 21 13:53 2017 4 | 5 | @author: Denis Tome' 6 | """ 7 | import os 8 | import scipy.io as sio 9 | import numpy as np 10 | from lifting.utils.upright_fast import pick_e 11 | from lifting.utils import config 12 | 13 | __all__ = ['Prob3dPose'] 14 | 15 | 16 | class Prob3dPose: 17 | 18 | def __init__(self, prob_model_path): 19 | model_param = sio.loadmat(prob_model_path) 20 | self.mu = np.reshape( 21 | model_param['mu'], (model_param['mu'].shape[0], 3, -1)) 22 | self.e = np.reshape(model_param['e'], (model_param['e'].shape[ 23 | 0], model_param['e'].shape[1], 3, -1)) 24 | self.sigma = model_param['sigma'] 25 | self.cam = np.array( 26 | [[1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0]]) 27 | 28 | @staticmethod 29 | def cost3d(model, gt): 30 | """3d error in mm""" 31 | out = np.sqrt(((gt - model) ** 2).sum(1)).mean(-1) 32 | return out 33 | 34 | @staticmethod 35 | def renorm_gt(gt): 36 | """Compel gt data to have mean joint length of one""" 37 | _POSE_TREE = np.asarray([ 38 | [0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], 39 | [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], 40 | [15, 16]]).T 41 | scale = np.sqrt(((gt[:, :, _POSE_TREE[0]] - 42 | gt[:, :, _POSE_TREE[1]]) ** 2).sum(2).sum(1)) 43 | return gt / scale[:, np.newaxis, np.newaxis] 44 | 45 | @staticmethod 46 | def build_model(a, e, s0): 47 | """Build 3D model""" 48 | assert (s0.shape[1] == 3) 49 | assert (e.shape[2] == 3) 50 | assert (a.shape[1] == e.shape[1]) 51 | out = np.einsum('...i,...ijk', a, e) 52 | out += s0 53 | return out 54 | 55 | @staticmethod 56 | def build_and_rot_model(a, e, s0, r): 57 | """ 58 | Build model and rotate according to the identified rotation matrix 59 | """ 60 | from numpy.core.umath_tests import matrix_multiply 61 | 62 | r2 = Prob3dPose.upgrade_r(r.T).transpose((0, 2, 1)) 63 | mod = Prob3dPose.build_model(a, e, s0) 64 | mod = matrix_multiply(r2, mod) 65 | return mod 66 | 67 | @staticmethod 68 | def upgrade_r(r): 69 | """ 70 | Upgrades complex parameterisation of planar rotation to tensor 71 | containing per frame 3x3 rotation matrices 72 | """ 73 | assert (r.ndim == 2) 74 | # Technically optional assert, but if this fails data is probably 75 | # transposed 76 | assert (r.shape[1] == 2) 77 | assert (np.all(np.isfinite(r))) 78 | norm = np.sqrt((r[:, :2] ** 2).sum(1)) 79 | assert (np.all(norm > 0)) 80 | r /= norm[:, np.newaxis] 81 | assert (np.all(np.isfinite(r))) 82 | newr = np.zeros((r.shape[0], 3, 3)) 83 | newr[:, :2, 0] = r[:, :2] 84 | newr[:, 2, 2] = 1 85 | newr[:, 1::-1, 1] = r[:, :2] 86 | newr[:, 0, 1] *= -1 87 | return newr 88 | 89 | @staticmethod 90 | def centre(data_2d): 91 | """center data according to each of the coordiante components""" 92 | return (data_2d.T - data_2d.mean(1)).T 93 | 94 | @staticmethod 95 | def centre_all(data): 96 | """center all data""" 97 | if data.ndim == 2: 98 | return Prob3dPose.centre(data) 99 | return (data.transpose(2, 0, 1) - data.mean(2)).transpose(1, 2, 0) 100 | 101 | @staticmethod 102 | def normalise_data(d2, weights): 103 | """Normalise data according to height""" 104 | 105 | # the joints with weight set to 0 should not be considered in the 106 | # normalisation process 107 | d2 = d2.reshape(d2.shape[0], -1, 2).transpose(0, 2, 1) 108 | idx_consider = weights[0, 0].astype(np.bool) 109 | if np.sum(weights[:, 0].sum(1) >= config.MIN_NUM_JOINTS) == 0: 110 | raise Exception( 111 | 'Not enough 2D joints identified to generate 3D pose') 112 | d2[:, :, idx_consider] = Prob3dPose.centre_all(d2[:, :, idx_consider]) 113 | 114 | # Height normalisation (2 meters) 115 | m2 = d2[:, 1, idx_consider].min(1) / 2.0 116 | m2 -= d2[:, 1, idx_consider].max(1) / 2.0 117 | crap = m2 == 0 118 | m2[crap] = 1.0 119 | d2[:, :, idx_consider] /= m2[:, np.newaxis, np.newaxis] 120 | return d2, m2 121 | 122 | @staticmethod 123 | def transform_joints(pose_2d, visible_joints): 124 | """ 125 | Transform the set of joints according to what the probabilistic model 126 | expects as input. 127 | 128 | It returns the new set of joints of each of the people and the set of 129 | weights for the joints. 130 | """ 131 | 132 | _H36M_ORDER = [8, 9, 10, 11, 12, 13, 1, 0, 5, 6, 7, 2, 3, 4] 133 | _W_POS = [1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16] 134 | 135 | def swap_xy(poses): 136 | tmp = np.copy(poses[:, :, 0]) 137 | poses[:, :, 0] = poses[:, :, 1] 138 | poses[:, :, 1] = tmp 139 | return poses 140 | 141 | assert (pose_2d.ndim == 3) 142 | new_pose = pose_2d.copy() 143 | new_pose = swap_xy(new_pose) 144 | new_pose = new_pose[:, _H36M_ORDER] 145 | 146 | # defining weights according to occlusions 147 | weights = np.zeros((pose_2d.shape[0], 2, config.H36M_NUM_JOINTS)) 148 | ordered_visibility = np.repeat( 149 | visible_joints[:, _H36M_ORDER, np.newaxis], 2, 2 150 | ).transpose([0, 2, 1]) 151 | weights[:, :, _W_POS] = ordered_visibility 152 | return new_pose, weights 153 | 154 | def affine_estimate(self, w, depth_reg=0.085, weights=None, scale=10.0, 155 | scale_mean=0.0016 * 1.8 * 1.2, scale_std=1.2 * 0, 156 | cap_scale=-0.00129): 157 | """ 158 | Quick switch to allow reconstruction at unknown scale returns a,r 159 | and scale 160 | """ 161 | weights = np.zeros((0, 0, 0)) if weights is None else weights 162 | 163 | s = np.empty((self.sigma.shape[0], self.sigma.shape[1] + 4)) # e,y,x,z 164 | s[:, :4] = 10 ** -5 # Tiny but makes stuff well-posed 165 | s[:, 0] = scale_std 166 | s[:, 4:] = self.sigma 167 | s[:, 4:-1] *= scale 168 | 169 | e2 = np.zeros((self.e.shape[0], self.e.shape[ 170 | 1] + 4, 3, self.e.shape[3])) 171 | e2[:, 1, 0] = 1.0 172 | e2[:, 2, 1] = 1.0 173 | e2[:, 3, 0] = 1.0 174 | # This makes the least_squares problem ill posed, as X,Z are 175 | # interchangable 176 | # Hence regularisation above to speed convergence and stop blow-up 177 | e2[:, 0] = self.mu 178 | e2[:, 4:] = self.e 179 | t_m = np.zeros_like(self.mu) 180 | 181 | res, a, r = pick_e(w, e2, t_m, self.cam, s, weights=weights, 182 | interval=0.01, depth_reg=depth_reg, 183 | scale_prior=scale_mean) 184 | 185 | scale = a[:, :, 0] 186 | reestimate = scale > cap_scale 187 | m = self.mu * cap_scale 188 | for i in range(scale.shape[0]): 189 | if reestimate[i].sum() > 0: 190 | ehat = e2[i:i + 1, 1:] 191 | mhat = m[i:i + 1] 192 | shat = s[i:i + 1, 1:] 193 | (res2, a2, r2) = pick_e( 194 | w[reestimate[i]], ehat, mhat, self.cam, shat, 195 | weights=weights[reestimate[i]], 196 | interval=0.01, depth_reg=depth_reg, 197 | scale_prior=scale_mean 198 | ) 199 | res[i:i + 1, reestimate[i]] = res2 200 | a[i:i + 1, reestimate[i], 1:] = a2 201 | a[i:i + 1, reestimate[i], 0] = cap_scale 202 | r[i:i + 1, :, reestimate[i]] = r2 203 | scale = a[:, :, 0] 204 | a = a[:, :, 1:] / a[:, :, 0][:, :, np.newaxis] 205 | return res, e2[:, 1:], a, r, scale 206 | 207 | def better_rec(self, w, model, s=1, weights=1, damp_z=1): 208 | """Quick switch to allow reconstruction at unknown scale 209 | returns a,r and scale""" 210 | from numpy.core.umath_tests import matrix_multiply 211 | proj = matrix_multiply(self.cam[np.newaxis], model) 212 | proj[:, :2] = (proj[:, :2] * s + w * weights) / (s + weights) 213 | proj[:, 2] *= damp_z 214 | out = matrix_multiply(self.cam.T[np.newaxis], proj) 215 | return out 216 | 217 | def create_rec(self, w2, weights, res_weight=1): 218 | """Reconstruct 3D pose given a 2D pose""" 219 | _SIGMA_SCALING = 5.2 220 | 221 | res, e, a, r, scale = self.affine_estimate( 222 | w2, scale=_SIGMA_SCALING, weights=weights, 223 | depth_reg=0, cap_scale=-0.001, scale_mean=-0.003 224 | ) 225 | 226 | remaining_dims = 3 * w2.shape[2] - e.shape[1] 227 | assert (remaining_dims >= 0) 228 | llambda = -np.log(self.sigma) 229 | lgdet = np.sum(llambda[:, :-1], 1) + llambda[:, -1] * remaining_dims 230 | score = (res * res_weight + lgdet[:, np.newaxis] * (scale ** 2)) 231 | best = np.argmin(score, 0) 232 | index = np.arange(best.shape[0]) 233 | a2 = a[best, index] 234 | r2 = r[best, :, index].T 235 | rec = Prob3dPose.build_and_rot_model(a2, e[best], self.mu[best], r2) 236 | rec *= -np.abs(scale[best, index])[:, np.newaxis, np.newaxis] 237 | 238 | rec = self.better_rec(w2, rec, 1, 1.55 * weights, 1) * -1 239 | rec = Prob3dPose.renorm_gt(rec) 240 | rec *= 0.97 241 | return rec 242 | 243 | def compute_3d(self, pose_2d, weights): 244 | """Reconstruct 3D poses given 2D estimations""" 245 | 246 | _J_POS = [1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16] 247 | _SCALE_3D = 1174.88312988 248 | 249 | if pose_2d.shape[1] != config.H36M_NUM_JOINTS: 250 | # need to call the linear regressor 251 | reg_joints = np.zeros( 252 | (pose_2d.shape[0], config.H36M_NUM_JOINTS, 2)) 253 | for oid, singe_pose in enumerate(pose_2d): 254 | reg_joints[oid, _J_POS] = singe_pose 255 | 256 | norm_pose, _ = Prob3dPose.normalise_data(reg_joints, weights) 257 | else: 258 | norm_pose, _ = Prob3dPose.normalise_data(pose_2d, weights) 259 | 260 | pose_3d = self.create_rec(norm_pose, weights) * _SCALE_3D 261 | return pose_3d 262 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/packages/lifting/utils/upright_fast.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on May 22 17:10 2017 4 | 5 | @author: Denis Tome' 6 | """ 7 | import numpy as np 8 | import scipy 9 | 10 | __all__ = [ 11 | 'upgrade_r', 12 | 'update_cam', 13 | 'estimate_a_and_r_with_res', 14 | 'estimate_a_and_r_with_res_weights', 15 | 'pick_e' 16 | ] 17 | 18 | 19 | def upgrade_r(r): 20 | """Upgrades complex parameterisation of planar rotation to tensor containing 21 | per frame 3x3 rotation matrices""" 22 | newr = np.zeros((3, 3)) 23 | newr[:2, 0] = r 24 | newr[2, 2] = 1 25 | newr[1::-1, 1] = r 26 | newr[0, 1] *= -1 27 | return newr 28 | 29 | 30 | def update_cam(cam): 31 | new_cam = cam[[0, 2, 1]].copy() 32 | new_cam = new_cam[:, [0, 2, 1]] 33 | return new_cam 34 | 35 | 36 | def estimate_a_and_r_with_res( 37 | w, e, s0, camera_r, Lambda, check, a, weights, res, proj_e, 38 | residue, Ps, depth_reg, scale_prior): 39 | """ 40 | TODO: Missing the following parameters in docstring: 41 | - w, e, s0, camera_r, Lambda, check, a, res, proj_e, depth_reg, 42 | scale_prior 43 | 44 | TODO: The following parameters are not used: 45 | - s0, weights 46 | 47 | So local optima are a problem in general. 48 | However: 49 | 50 | 1. This problem is convex in a but not in r, and 51 | 52 | 2. each frame can be solved independently. 53 | 54 | So for each frame, we can do a grid search in r and take the globally 55 | optimal solution. 56 | 57 | In practice, we just brute force over 100 different estimates of r, and 58 | take the best pair (r,a*(r)) where a*(r) is the optimal minimiser of a 59 | given r. 60 | 61 | Arguments: 62 | 63 | w is a 3d measurement matrix of form frames*2*points 64 | 65 | e is a 3d set of basis vectors of from basis*3*points 66 | 67 | s0 is the 3d rest shape of form 3*points 68 | 69 | Lambda are the regularisor coefficients on the coefficients of the 70 | weights typically generated using PPCA 71 | 72 | interval is how far round the circle we should check for break points 73 | we check every interval*2*pi radians 74 | 75 | Returns: 76 | 77 | a (basis coefficients) and r (representation of rotations as a complex 78 | number) 79 | """ 80 | frames = w.shape[0] 81 | points = w.shape[2] 82 | basis = e.shape[0] 83 | r = np.empty(2) 84 | Ps_reshape = Ps.reshape(2 * points) 85 | w_reshape = w.reshape((frames, points * 2)) 86 | 87 | for i in range(check.size): 88 | c = check[i] 89 | r[0] = np.cos(c) 90 | r[1] = np.sin(c) 91 | grot = camera_r.dot(upgrade_r(r)) 92 | rot = grot[:2] 93 | res[:, :points * 2] = w_reshape 94 | res[:, :points * 2] -= Ps_reshape 95 | proj_e[:, :2 * points] = rot.dot(e).transpose(1, 0, 2).reshape( 96 | e.shape[0], 2 * points) 97 | 98 | if Lambda.size != 0: 99 | proj_e[:, 2 * points:2 * points + basis] = np.diag(Lambda[:Lambda.shape[0] - 1]) 100 | res[:, 2 * points:].fill(0) 101 | res[:, :points * 2] *= Lambda[Lambda.shape[0] - 1] 102 | proj_e[:, :points * 2] *= Lambda[Lambda.shape[0] - 1] 103 | # depth regularizer not used 104 | proj_e[:, 2 * points + basis:] = ((Lambda[Lambda.shape[0] - 1] * 105 | depth_reg) * grot[2]).dot(e) 106 | # we let the person change scale 107 | res[:, 2 * points] = scale_prior 108 | 109 | """ 110 | TODO: PLEASE REVIEW THE FOLLOWING CODE.... 111 | overwrite_a and overwrite_b ARE UNEXPECTED ARGUMENTS OF 112 | scipy.linalg.lstsq 113 | """ 114 | a[i], residue[i], _, _ = scipy.linalg.lstsq( 115 | proj_e.T, res.T, overwrite_a=True, overwrite_b=True) 116 | 117 | # find and return best coresponding solution 118 | best = np.argmin(residue, 0) 119 | assert (best.shape[0] == frames) 120 | theta = check[best] 121 | index = (best, np.arange(frames)) 122 | aa = a.transpose(0, 2, 1)[index] 123 | retres = residue[index] 124 | r = np.empty((2, frames)) 125 | r[0] = np.sin(theta) 126 | r[1] = np.cos(theta) 127 | return aa, r, retres 128 | 129 | 130 | def estimate_a_and_r_with_res_weights( 131 | w, e, s0, camera_r, Lambda, check, a, weights, res, proj_e, 132 | residue, Ps, depth_reg, scale_prior): 133 | """ 134 | TODO: Missing the following parameters in docstring: 135 | - w, e, s0, camera)r, Lambda, check, a, res, proj_e, residue, 136 | Ps, depth_reg, scale_prior 137 | 138 | So local optima are a problem in general. 139 | However: 140 | 141 | 1. This problem is convex in a but not in r, and 142 | 143 | 2. each frame can be solved independently. 144 | 145 | So for each frame, we can do a grid search in r and take the globally 146 | optimal solution. 147 | 148 | In practice, we just brute force over 100 different estimates of r, and 149 | take 150 | the best pair (r,a*(r)) where a*(r) is the optimal minimiser of a given r. 151 | 152 | Arguments: 153 | 154 | w is a 3d measurement matrix of form frames*2*points 155 | 156 | e is a 3d set of basis vectors of from basis*3*points 157 | 158 | s0 is the 3d rest shape of form 3*points 159 | 160 | Lambda are the regularisor coefficients on the coefficients of the 161 | weights 162 | typically generated using PPCA 163 | 164 | interval is how far round the circle we should check for break points 165 | we check every interval*2*pi radians 166 | 167 | Returns: 168 | 169 | a (basis coefficients) and r (representation of rotations as a complex 170 | number) 171 | """ 172 | frames = w.shape[0] 173 | points = w.shape[2] 174 | basis = e.shape[0] 175 | r = np.empty(2) 176 | Ps_reshape = Ps.reshape(2 * points) 177 | w_reshape = w.reshape((frames, points * 2)) 178 | p_copy = np.empty_like(proj_e) 179 | 180 | for i in range(check.size): 181 | c = check[i] 182 | r[0] = np.sin(c) 183 | r[1] = np.cos(c) 184 | grot = camera_r.dot(upgrade_r(r).T) 185 | rot = grot[:2] 186 | rot.dot(s0, Ps) # TODO: remove? 187 | res[:, :points * 2] = w_reshape 188 | res[:, :points * 2] -= Ps_reshape 189 | proj_e[:, :2 * points] = rot.dot(e).transpose(1, 0, 2).reshape( 190 | e.shape[0], 2 * points) 191 | 192 | if Lambda.size != 0: 193 | proj_e[:, 2 * points:2 * points + basis] = np.diag(Lambda[:Lambda.shape[0] - 1]) 194 | res[:, 2 * points:].fill(0) 195 | res[:, :points * 2] *= Lambda[Lambda.shape[0] - 1] 196 | proj_e[:, :points * 2] *= Lambda[Lambda.shape[0] - 1] 197 | proj_e[:, 2 * points + basis:] = ((Lambda[Lambda.shape[0] - 1] * 198 | depth_reg) * grot[2]).dot(e) 199 | res[:, 2 * points] = scale_prior 200 | if weights.size != 0: 201 | res[:, :points * 2] *= weights 202 | for j in range(frames): 203 | p_copy[:] = proj_e 204 | p_copy[:, :points * 2] *= weights[j] 205 | a[i, :, j], comp_residual, _, _ = np.linalg.lstsq( 206 | p_copy.T, res[j].T) 207 | if not comp_residual: 208 | # equations are over-determined 209 | residue[i, j] = 1e-5 210 | else: 211 | residue[i, j] = comp_residual 212 | # find and return best coresponding solution 213 | best = np.argmin(residue, 0) 214 | index = (best, np.arange(frames)) 215 | theta = check[best] 216 | aa = a.transpose(0, 2, 1)[index] 217 | retres = residue[index] 218 | r = np.empty((2, frames)) 219 | r[0] = np.sin(theta) 220 | r[1] = np.cos(theta) 221 | return aa, r, retres 222 | 223 | 224 | def pick_e(w, e, s0, camera_r=None, Lambda=None, 225 | weights=None, scale_prior=-0.0014, interval=0.01, depth_reg=0.0325): 226 | """Brute force over charts from the manifold to find the best one. 227 | Returns best chart index and its a and r coefficients 228 | Returns assignment, and a and r coefficents""" 229 | 230 | camera_r = np.asarray([[1, 0, 0], [0, 0, -1], [0, 1, 0]] 231 | ) if camera_r is None else camera_r 232 | Lambda = np.ones((0, 0)) if Lambda is None else Lambda 233 | weights = np.ones((0, 0, 0)) if weights is None else weights 234 | 235 | charts = e.shape[0] 236 | frames = w.shape[0] 237 | basis = e.shape[1] 238 | points = e.shape[3] 239 | assert (s0.shape[0] == charts) 240 | r = np.empty((charts, 2, frames)) 241 | a = np.empty((charts, frames, e.shape[1])) 242 | score = np.empty((charts, frames)) 243 | check = np.arange(0, 1, interval) * 2 * np.pi 244 | cache_a = np.empty((check.size, basis, frames)) 245 | residue = np.empty((check.size, frames)) 246 | 247 | if Lambda.size != 0: 248 | res = np.zeros((frames, points * 2 + basis + points)) 249 | proj_e = np.zeros((basis, 2 * points + basis + points)) 250 | else: 251 | res = np.empty((frames, points * 2)) 252 | proj_e = np.empty((basis, 2 * points)) 253 | Ps = np.empty((2, points)) 254 | 255 | if weights.size == 0: 256 | for i in range(charts): 257 | if Lambda.size != 0: 258 | a[i], r[i], score[i] = estimate_a_and_r_with_res( 259 | w, e[i], s0[i], camera_r, 260 | Lambda[i], check, cache_a, weights, 261 | res, proj_e, residue, Ps, 262 | depth_reg, scale_prior) 263 | else: 264 | a[i], r[i], score[i] = estimate_a_and_r_with_res( 265 | w, e[i], s0[i], camera_r, Lambda, 266 | check, cache_a, weights, 267 | res, proj_e, residue, Ps, 268 | depth_reg, scale_prior) 269 | else: 270 | w2 = weights.reshape(weights.shape[0], -1) 271 | for i in range(charts): 272 | if Lambda.size != 0: 273 | a[i], r[i], score[i] = estimate_a_and_r_with_res_weights( 274 | w, e[i], s0[i], camera_r, 275 | Lambda[i], check, cache_a, w2, 276 | res, proj_e, residue, Ps, 277 | depth_reg, scale_prior) 278 | else: 279 | a[i], r[i], score[i] = estimate_a_and_r_with_res_weights( 280 | w, e[i], s0[i], camera_r, Lambda, 281 | check, cache_a, w2, 282 | res, proj_e, residue, Ps, 283 | depth_reg, scale_prior) 284 | 285 | remaining_dims = 3 * w.shape[2] - e.shape[1] 286 | assert (np.all(score > 0)) 287 | assert (remaining_dims >= 0) 288 | # Zero problems in log space due to un-regularised first co-efficient 289 | l = Lambda.copy() 290 | l[Lambda == 0] = 1 291 | llambda = -np.log(l) 292 | score /= 2 293 | return score, a, r 294 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/packages/lifting/utils/process.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mar 23 15:29 2017 4 | 5 | @author: Denis Tome' 6 | """ 7 | from __future__ import division 8 | 9 | import os 10 | import json 11 | import numpy as np 12 | from lifting.utils import config 13 | import cv2 14 | import skimage.io 15 | import skimage.transform 16 | import scipy.ndimage as ndimage 17 | import scipy.ndimage.filters as filters 18 | from itertools import compress 19 | from scipy.stats import multivariate_normal 20 | 21 | __all__ = [ 22 | 'detect_objects_heatmap', 23 | 'detect_objects_heatmap', 24 | 'gaussian_kernel', 25 | 'gaussian_heatmap', 26 | 'prepare_input_posenet', 27 | 'detect_parts_heatmaps', 28 | 'import_json', 29 | 'generate_labels', 30 | 'generate_center_map', 31 | 'rescale', 32 | 'crop_image' 33 | ] 34 | 35 | 36 | def detect_objects_heatmap(heatmap): 37 | data = 256 * heatmap 38 | data_max = filters.maximum_filter(data, 3) 39 | maxima = (data == data_max) 40 | data_min = filters.minimum_filter(data, 3) 41 | diff = ((data_max - data_min) > 0.3) 42 | maxima[diff == 0] = 0 43 | labeled, num_objects = ndimage.label(maxima) 44 | slices = ndimage.find_objects(labeled) 45 | objects = np.zeros((num_objects, 2), dtype=np.int32) 46 | pidx = 0 47 | for (dy, dx) in slices: 48 | pos = [(dy.start + dy.stop - 1) // 2, (dx.start + dx.stop - 1) // 2] 49 | if heatmap[pos[0], pos[1]] > config.CENTER_TR: 50 | objects[pidx, :] = pos 51 | pidx += 1 52 | return objects[:pidx] 53 | 54 | 55 | def gaussian_kernel(h, w, sigma_h, sigma_w): 56 | yx = np.mgrid[-h // 2:h // 2, -w // 2:w // 2] ** 2 57 | return np.exp(-yx[0, :, :] / sigma_h ** 2 - yx[1, :, :] / sigma_w ** 2) 58 | 59 | 60 | def gaussian_heatmap(h, w, pos_x, pos_y, sigma_h=1, sigma_w=1, init=None): 61 | """ 62 | Compute the heat-map of size (w x h) with a gaussian distribution fit in 63 | position (pos_x, pos_y) and a convariance matix defined by the related 64 | sigma values. 65 | The resulting heat-map can be summed to a given heat-map init. 66 | """ 67 | init = init if init is not None else [] 68 | 69 | cov_matrix = np.eye(2) * ([sigma_h**2, sigma_w**2]) 70 | 71 | x, y = np.mgrid[0:h, 0:w] 72 | pos = np.dstack((x, y)) 73 | rv = multivariate_normal([pos_x, pos_y], cov_matrix) 74 | 75 | tmp = rv.pdf(pos) 76 | hmap = np.multiply( 77 | tmp, np.sqrt(np.power(2 * np.pi, 2) * np.linalg.det(cov_matrix)) 78 | ) 79 | idx = np.where(hmap.flatten() <= np.exp(-4.6052)) 80 | hmap.flatten()[idx] = 0 81 | 82 | if np.size(init) == 0: 83 | return hmap 84 | 85 | assert (np.shape(init) == hmap.shape) 86 | hmap += init 87 | idx = np.where(hmap.flatten() > 1) 88 | hmap.flatten()[idx] = 1 89 | return hmap 90 | 91 | 92 | def prepare_input_posenet(image, objects, size_person, size, sigma=25, 93 | max_num_objects=16, border=400): 94 | result = np.zeros((max_num_objects, size[0], size[1], 4)) 95 | padded_image = np.zeros( 96 | (1, size_person[0] + border, size_person[1] + border, 4)) 97 | padded_image[0, border // 2:-border // 2, 98 | border // 2:-border // 2, :3] = image 99 | assert len(objects) < max_num_objects 100 | for oid, (yc, xc) in enumerate(objects): 101 | dh, dw = size[0] // 2, size[1] // 2 102 | y0, x0, y1, x1 = np.array( 103 | [yc - dh, xc - dw, yc + dh, xc + dw]) + border // 2 104 | result[oid, :, :, :4] = padded_image[:, y0:y1, x0:x1, :] 105 | result[oid, :, :, 3] = gaussian_kernel(size[0], size[1], sigma, sigma) 106 | return np.split(result, [3], 3) 107 | 108 | 109 | def detect_parts_heatmaps(heatmaps, centers, size, num_parts=14): 110 | """ 111 | Given heat-maps find the position of each joint by means of n argmax 112 | function 113 | """ 114 | parts = np.zeros((len(centers), num_parts, 2), dtype=np.int32) 115 | visible = np.zeros((len(centers), num_parts), dtype=bool) 116 | for oid, (yc, xc) in enumerate(centers): 117 | part_hmap = skimage.transform.resize( 118 | np.clip(heatmaps[oid], -1, 1), size) 119 | for pid in range(num_parts): 120 | y, x = np.unravel_index(np.argmax(part_hmap[:, :, pid]), size) 121 | parts[oid, pid] = y + yc - size[0] // 2, x + xc - size[1] // 2 122 | visible[oid, pid] = np.mean( 123 | part_hmap[:, :, pid]) > config.VISIBLE_PART 124 | return parts, visible 125 | 126 | 127 | def import_json(path='json/MPI_annotations.json', order='json/MPI_order.npy'): 128 | """Get the json file containing the dataset. 129 | We want the data to be shuffled, however the training has to be repeatable. 130 | This means that once shuffled the order has to me mantained.""" 131 | with open(path) as data_file: 132 | data_this = json.load(data_file) 133 | data_this = np.array(data_this['root']) 134 | num_samples = len(data_this) 135 | 136 | if os.path.exists(order): 137 | idx = np.load(order) 138 | else: 139 | idx = np.random.permutation(num_samples).tolist() 140 | np.save(order, idx) 141 | 142 | is_not_validation = [not data_this[i]['isValidation'] 143 | for i in range(num_samples)] 144 | keep_data_idx = list(compress(idx, is_not_validation)) 145 | 146 | data = data_this[keep_data_idx] 147 | return data, len(keep_data_idx) 148 | 149 | 150 | def generate_labels(image_shape, joint_positions, num_other_people, 151 | joints_other_people, offset): 152 | """ 153 | Given as input a set of joint positions and the size of the input image 154 | it generates 155 | a set of heat-maps of the same size. It generates both heat-maps used as 156 | labels for the first stage (label_1st_lower) and for all the other stages 157 | (label_lower). 158 | """ 159 | _FILTER_JOINTS = np.array([9, 8, 12, 11, 10, 13, 14, 15, 2, 1, 0, 3, 4, 5]) 160 | 161 | img_height, img_width, _ = image_shape 162 | heat_maps_single_p = np.zeros( 163 | (config.NUM_OUTPUT, config.INPUT_SIZE, config.INPUT_SIZE)) 164 | heat_maps_other_p = np.zeros( 165 | (config.NUM_OUTPUT, config.INPUT_SIZE, config.INPUT_SIZE)) 166 | 167 | # generate first set of heat-maps 168 | for i in range(config.NUM_JOINTS): 169 | # the set of joints can be different fromt the one in the json file 170 | curr_joint = joint_positions[_FILTER_JOINTS[i]] 171 | skip = (curr_joint[0] < 0 or curr_joint[1] < 0 or 172 | curr_joint[0] >= img_width or curr_joint[1] >= img_height) 173 | if not skip: 174 | heat_maps_single_p[i] = gaussian_heatmap( 175 | config.INPUT_SIZE, config.INPUT_SIZE, 176 | curr_joint[ 177 | 1] - offset[1], curr_joint[0] - offset[0], 178 | config.SIGMA, config.SIGMA) 179 | 180 | heat_maps_other_p[i] = gaussian_heatmap( 181 | config.INPUT_SIZE, config.INPUT_SIZE, 182 | curr_joint[ 183 | 1] - offset[1], curr_joint[0] - offset[0], 184 | config.SIGMA, config.SIGMA) 185 | 186 | heat_maps_single_p[-1] = np.maximum( 187 | 1 - np.max(heat_maps_single_p[:-1], axis=0), 188 | np.zeros((config.INPUT_SIZE, config.INPUT_SIZE))) 189 | heat_maps_single_p = np.transpose(heat_maps_single_p, (1, 2, 0)) 190 | 191 | # generate second set of heat-maps for other people in the image 192 | for p in range(int(num_other_people)): 193 | for i in range(config.NUM_JOINTS): 194 | # the set of joints can be different fromt the one in the json file 195 | try: 196 | if num_other_people == 1: 197 | curr_joint = joints_other_people[_FILTER_JOINTS[i]] 198 | else: 199 | curr_joint = joints_other_people[p][_FILTER_JOINTS[i]] 200 | skip = ( 201 | curr_joint[0] < 0 or curr_joint[1] < 0 or 202 | curr_joint[0] >= img_width or curr_joint[1] >= img_height) 203 | except IndexError: 204 | skip = True 205 | if not skip: 206 | heat_maps_other_p[i] = gaussian_heatmap( 207 | config.INPUT_SIZE, config.INPUT_SIZE, 208 | curr_joint[1] - offset[1], curr_joint[0] - offset[0], 209 | config.SIGMA, config.SIGMA, init=heat_maps_other_p[i]) 210 | 211 | heat_maps_other_p[-1] = np.maximum( 212 | 1 - np.max(heat_maps_other_p[:-1], axis=0), 213 | np.zeros((config.INPUT_SIZE, config.INPUT_SIZE))) 214 | 215 | heat_maps_other_p = np.transpose(heat_maps_other_p, (1, 2, 0)) 216 | 217 | # rescaling heat-maps accoring to the right shape 218 | labels_single = rescale(heat_maps_single_p, config.OUTPUT_SIZE) 219 | labels_people = rescale(heat_maps_other_p, config.OUTPUT_SIZE) 220 | return labels_people, labels_single 221 | 222 | 223 | def generate_center_map(center_pos, img_shape): 224 | """ 225 | Given the position of the person and the size of the input image it 226 | generates 227 | a heat-map where a gaissian distribution is fit in the position of the 228 | person in the image. 229 | """ 230 | img_height = img_shape 231 | img_width = img_shape 232 | center_map = gaussian_heatmap( 233 | img_height, img_width, center_pos[1], center_pos[0], 234 | config.SIGMA_CENTER, config.SIGMA_CENTER) 235 | return center_map 236 | 237 | 238 | def rescale(data, new_size): 239 | """Rescale data to a fixed dimension, regardless the number of channels. 240 | Data has to be in the format (h,w,c).""" 241 | if data.ndim > 2: 242 | assert data.shape[2] < data.shape[0] 243 | assert data.shape[2] < data.shape[1] 244 | resized_data = cv2.resize( 245 | data, (new_size, new_size), interpolation=cv2.INTER_CUBIC) 246 | return resized_data 247 | 248 | 249 | def crop_image(image, obj_pose): 250 | """ 251 | Crop the image in order to have the person at the center and the final 252 | image size 253 | is the same as the expected CNN input size. 254 | Returns the cropped image and the offset that is used to update the joint 255 | positions. 256 | """ 257 | offset_left = int(obj_pose[0] - config.INPUT_SIZE // 2) 258 | offset_up = int(obj_pose[1] - config.INPUT_SIZE // 2) 259 | # just for checking that it's inside the image 260 | offset_right = int(image.shape[1] - obj_pose[0] - config.INPUT_SIZE // 2) 261 | offset_bottom = int(image.shape[0] - obj_pose[1] - config.INPUT_SIZE // 2) 262 | 263 | pad_left, pad_right, pad_up, pad_bottom = 0, 0, 0, 0 264 | if offset_left < 0: 265 | pad_left = -offset_left 266 | if offset_right < 0: 267 | pad_right = -offset_right 268 | if offset_up < 0: 269 | pad_up = -offset_up 270 | if offset_bottom < 0: 271 | pad_bottom = -offset_bottom 272 | padded_image = np.lib.pad( 273 | image, ((pad_up, pad_bottom), (pad_left, pad_right), (0, 0)), 274 | 'constant', constant_values=((0, 0), (0, 0), (0, 0))) 275 | 276 | cropped_image = padded_image[ 277 | offset_up + pad_up: offset_up + pad_up + config.INPUT_SIZE, 278 | offset_left + pad_left: offset_left + pad_left + config.INPUT_SIZE] 279 | 280 | return cropped_image, np.array([offset_left, offset_up]) 281 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/models/fcrn.py: -------------------------------------------------------------------------------- 1 | from .network import Network 2 | 3 | class ResNet50UpProj(Network): 4 | def setup(self): 5 | (self.feed('data') 6 | .conv(7, 7, 64, 2, 2, relu=False, name='conv1') 7 | .batch_normalization(relu=True, name='bn_conv1') 8 | .max_pool(3, 3, 2, 2, name='pool1') 9 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1') 10 | .batch_normalization(name='bn2a_branch1')) 11 | 12 | (self.feed('pool1') 13 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a') 14 | .batch_normalization(relu=True, name='bn2a_branch2a') 15 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b') 16 | .batch_normalization(relu=True, name='bn2a_branch2b') 17 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c') 18 | .batch_normalization(name='bn2a_branch2c')) 19 | 20 | (self.feed('bn2a_branch1', 21 | 'bn2a_branch2c') 22 | .add(name='res2a') 23 | .relu(name='res2a_relu') 24 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a') 25 | .batch_normalization(relu=True, name='bn2b_branch2a') 26 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b') 27 | .batch_normalization(relu=True, name='bn2b_branch2b') 28 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c') 29 | .batch_normalization(name='bn2b_branch2c')) 30 | 31 | (self.feed('res2a_relu', 32 | 'bn2b_branch2c') 33 | .add(name='res2b') 34 | .relu(name='res2b_relu') 35 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a') 36 | .batch_normalization(relu=True, name='bn2c_branch2a') 37 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b') 38 | .batch_normalization(relu=True, name='bn2c_branch2b') 39 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c') 40 | .batch_normalization(name='bn2c_branch2c')) 41 | 42 | (self.feed('res2b_relu', 43 | 'bn2c_branch2c') 44 | .add(name='res2c') 45 | .relu(name='res2c_relu') 46 | .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1') 47 | .batch_normalization(name='bn3a_branch1')) 48 | 49 | (self.feed('res2c_relu') 50 | .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a') 51 | .batch_normalization(relu=True, name='bn3a_branch2a') 52 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b') 53 | .batch_normalization(relu=True, name='bn3a_branch2b') 54 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c') 55 | .batch_normalization(name='bn3a_branch2c')) 56 | 57 | (self.feed('bn3a_branch1', 58 | 'bn3a_branch2c') 59 | .add(name='res3a') 60 | .relu(name='res3a_relu') 61 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b_branch2a') 62 | .batch_normalization(relu=True, name='bn3b_branch2a') 63 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b_branch2b') 64 | .batch_normalization(relu=True, name='bn3b_branch2b') 65 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b_branch2c') 66 | .batch_normalization(name='bn3b_branch2c')) 67 | 68 | (self.feed('res3a_relu', 69 | 'bn3b_branch2c') 70 | .add(name='res3b') 71 | .relu(name='res3b_relu') 72 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3c_branch2a') 73 | .batch_normalization(relu=True, name='bn3c_branch2a') 74 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3c_branch2b') 75 | .batch_normalization(relu=True, name='bn3c_branch2b') 76 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3c_branch2c') 77 | .batch_normalization(name='bn3c_branch2c')) 78 | 79 | (self.feed('res3b_relu', 80 | 'bn3c_branch2c') 81 | .add(name='res3c') 82 | .relu(name='res3c_relu') 83 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3d_branch2a') 84 | .batch_normalization(relu=True, name='bn3d_branch2a') 85 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3d_branch2b') 86 | .batch_normalization(relu=True, name='bn3d_branch2b') 87 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3d_branch2c') 88 | .batch_normalization(name='bn3d_branch2c')) 89 | 90 | (self.feed('res3c_relu', 91 | 'bn3d_branch2c') 92 | .add(name='res3d') 93 | .relu(name='res3d_relu') 94 | .conv(1, 1, 1024, 2, 2, biased=False, relu=False, name='res4a_branch1') 95 | .batch_normalization(name='bn4a_branch1')) 96 | 97 | (self.feed('res3d_relu') 98 | .conv(1, 1, 256, 2, 2, biased=False, relu=False, name='res4a_branch2a') 99 | .batch_normalization(relu=True, name='bn4a_branch2a') 100 | .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4a_branch2b') 101 | .batch_normalization(relu=True, name='bn4a_branch2b') 102 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c') 103 | .batch_normalization(name='bn4a_branch2c')) 104 | 105 | (self.feed('bn4a_branch1', 106 | 'bn4a_branch2c') 107 | .add(name='res4a') 108 | .relu(name='res4a_relu') 109 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b_branch2a') 110 | .batch_normalization(relu=True, name='bn4b_branch2a') 111 | .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4b_branch2b') 112 | .batch_normalization(relu=True, name='bn4b_branch2b') 113 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b_branch2c') 114 | .batch_normalization(name='bn4b_branch2c')) 115 | 116 | (self.feed('res4a_relu', 117 | 'bn4b_branch2c') 118 | .add(name='res4b') 119 | .relu(name='res4b_relu') 120 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4c_branch2a') 121 | .batch_normalization(relu=True, name='bn4c_branch2a') 122 | .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4c_branch2b') 123 | .batch_normalization(relu=True, name='bn4c_branch2b') 124 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4c_branch2c') 125 | .batch_normalization(name='bn4c_branch2c')) 126 | 127 | (self.feed('res4b_relu', 128 | 'bn4c_branch2c') 129 | .add(name='res4c') 130 | .relu(name='res4c_relu') 131 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4d_branch2a') 132 | .batch_normalization(relu=True, name='bn4d_branch2a') 133 | .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4d_branch2b') 134 | .batch_normalization(relu=True, name='bn4d_branch2b') 135 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4d_branch2c') 136 | .batch_normalization(name='bn4d_branch2c')) 137 | 138 | (self.feed('res4c_relu', 139 | 'bn4d_branch2c') 140 | .add(name='res4d') 141 | .relu(name='res4d_relu') 142 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4e_branch2a') 143 | .batch_normalization(relu=True, name='bn4e_branch2a') 144 | .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4e_branch2b') 145 | .batch_normalization(relu=True, name='bn4e_branch2b') 146 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4e_branch2c') 147 | .batch_normalization(name='bn4e_branch2c')) 148 | 149 | (self.feed('res4d_relu', 150 | 'bn4e_branch2c') 151 | .add(name='res4e') 152 | .relu(name='res4e_relu') 153 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4f_branch2a') 154 | .batch_normalization(relu=True, name='bn4f_branch2a') 155 | .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4f_branch2b') 156 | .batch_normalization(relu=True, name='bn4f_branch2b') 157 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4f_branch2c') 158 | .batch_normalization(name='bn4f_branch2c')) 159 | 160 | (self.feed('res4e_relu', 161 | 'bn4f_branch2c') 162 | .add(name='res4f') 163 | .relu(name='res4f_relu') 164 | .conv(1, 1, 2048, 2, 2, biased=False, relu=False, name='res5a_branch1') 165 | .batch_normalization(name='bn5a_branch1')) 166 | 167 | (self.feed('res4f_relu') 168 | .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res5a_branch2a') 169 | .batch_normalization(relu=True, name='bn5a_branch2a') 170 | .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5a_branch2b') 171 | .batch_normalization(relu=True, name='bn5a_branch2b') 172 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c') 173 | .batch_normalization(name='bn5a_branch2c')) 174 | 175 | (self.feed('bn5a_branch1', 176 | 'bn5a_branch2c') 177 | .add(name='res5a') 178 | .relu(name='res5a_relu') 179 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a') 180 | .batch_normalization(relu=True, name='bn5b_branch2a') 181 | .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5b_branch2b') 182 | .batch_normalization(relu=True, name='bn5b_branch2b') 183 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c') 184 | .batch_normalization(name='bn5b_branch2c')) 185 | 186 | (self.feed('res5a_relu', 187 | 'bn5b_branch2c') 188 | .add(name='res5b') 189 | .relu(name='res5b_relu') 190 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a') 191 | .batch_normalization(relu=True, name='bn5c_branch2a') 192 | .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5c_branch2b') 193 | .batch_normalization(relu=True, name='bn5c_branch2b') 194 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c') 195 | .batch_normalization(name='bn5c_branch2c')) 196 | 197 | (self.feed('res5b_relu', 198 | 'bn5c_branch2c') 199 | .add(name='res5c') 200 | .relu(name='res5c_relu') 201 | .conv(1, 1, 1024, 1, 1, biased=True, relu=False, name='layer1') 202 | .batch_normalization(relu=False, name='layer1_BN') 203 | .up_project([3, 3, 1024, 512], id = '2x', stride = 1, BN=True) 204 | .up_project([3, 3, 512, 256], id = '4x', stride = 1, BN=True) 205 | .up_project([3, 3, 256, 128], id = '8x', stride = 1, BN=True) 206 | .up_project([3, 3, 128, 64], id = '16x', stride = 1, BN=True) 207 | .dropout(name = 'drop', keep_prob = 1.) 208 | .conv(3, 3, 1, 1, 1, name = 'ConvPred')) 209 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/openpose_3dpose_sandbox.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib.gridspec as gridspec 4 | import tensorflow as tf 5 | import data_utils 6 | import viz 7 | import re 8 | import cameras 9 | import json 10 | import os 11 | from predict_3dpose import create_model 12 | import cv2 13 | import imageio 14 | import logging 15 | FLAGS = tf.app.flags.FLAGS 16 | 17 | order = [15, 12, 25, 26, 27, 17, 18, 19, 1, 2, 3, 6, 7, 8] 18 | 19 | logging.basicConfig(level=logging.INFO) 20 | logger = logging.getLogger(__name__) 21 | 22 | def show_anim_curves(anim_dict, _plt): 23 | val = np.array(list(anim_dict.values())) 24 | for o in range(0,36,2): 25 | x = val[:,o] 26 | y = val[:,o+1] 27 | _plt.plot(x, 'r--', linewidth=0.2) 28 | _plt.plot(y, 'g', linewidth=0.2) 29 | return _plt 30 | 31 | def read_openpose_json(smooth=True, *args): 32 | # openpose output format: 33 | # [x1,y1,c1,x2,y2,c2,...] 34 | # ignore confidence score, take x and y [x1,y1,x2,y2,...] 35 | 36 | logger.info("start reading data") 37 | #load json files 38 | json_files = os.listdir(openpose_output_dir) 39 | # check for other file types 40 | json_files = sorted([filename for filename in json_files if filename.endswith(".json")]) 41 | cache = {} 42 | smoothed = {} 43 | ### extract x,y and ignore confidence score 44 | for file_name in json_files: 45 | logger.debug("reading {0}".format(file_name)) 46 | _file = os.path.join(openpose_output_dir, file_name) 47 | if not os.path.isfile(_file): raise Exception("No file found!!, {0}".format(_file)) 48 | data = json.load(open(_file)) 49 | #take first person 50 | _data = data["people"][0]["pose_keypoints"] 51 | xy = [] 52 | #ignore confidence score 53 | for o in range(0,len(_data),3): 54 | xy.append(_data[o]) 55 | xy.append(_data[o+1]) 56 | 57 | # get frame index from openpose 12 padding 58 | frame_indx = re.findall("(\d+)", file_name) 59 | logger.debug("found {0} for frame {1}".format(xy, str(int(frame_indx[0])))) 60 | #add xy to frame 61 | cache[int(frame_indx[0])] = xy 62 | plt.figure(1) 63 | drop_curves_plot = show_anim_curves(cache, plt) 64 | pngName = 'png/dirty_plot.png' 65 | drop_curves_plot.savefig(pngName) 66 | 67 | # exit if no smoothing 68 | if not smooth: 69 | # return frames cache incl. 18 joints (x,y) 70 | return cache 71 | 72 | if len(json_files) == 1: 73 | logger.info("found single json file") 74 | # return frames cache incl. 18 joints (x,y) on single image\json 75 | return cache 76 | 77 | if len(json_files) <= 8: 78 | raise Exception("need more frames, min 9 frames/json files for smoothing!!!") 79 | 80 | logger.info("start smoothing") 81 | 82 | # create frame blocks 83 | first_frame_block = [int(re.findall("(\d+)", o)[0]) for o in json_files[:4]] 84 | last_frame_block = [int(re.findall("(\d+)", o)[0]) for o in json_files[-4:]] 85 | 86 | ### smooth by median value, n frames 87 | for frame, xy in cache.items(): 88 | 89 | # create neighbor array based on frame index 90 | forward, back = ([] for _ in range(2)) 91 | 92 | # joints x,y array 93 | _len = len(xy) # 36 94 | 95 | # create array of parallel frames (-33) 96 | for neighbor in range(1,4): 97 | # first n frames, get value of xy in postive lookahead frames(current frame + 3) 98 | if frame in first_frame_block: 99 | forward += cache[frame+neighbor] 100 | # last n frames, get value of xy in negative lookahead frames(current frame - 3) 101 | elif frame in last_frame_block: 102 | back += cache[frame-neighbor] 103 | else: 104 | # between frames, get value of xy in bi-directional frames(current frame -+ 3) 105 | forward += cache[frame+neighbor] 106 | back += cache[frame-neighbor] 107 | 108 | # build frame range vector 109 | frames_joint_median = [0 for i in range(_len)] 110 | # more info about mapping in src/data_utils.py 111 | # for each 18joints*x,y (x1,y1,x2,y2,...)~36 112 | for x in range(0,_len,2): 113 | # set x and y 114 | y = x+1 115 | if frame in first_frame_block: 116 | # get vector of n frames forward for x and y, incl. current frame 117 | x_v = [xy[x], forward[x], forward[x+_len], forward[x+_len*2]] 118 | y_v = [xy[y], forward[y], forward[y+_len], forward[y+_len*2]] 119 | elif frame in last_frame_block: 120 | # get vector of n frames back for x and y, incl. current frame 121 | x_v =[xy[x], back[x], back[x+_len], back[x+_len*2]] 122 | y_v =[xy[y], back[y], back[y+_len], back[y+_len*2]] 123 | else: 124 | # get vector of n frames forward/back for x and y, incl. current frame 125 | # median value calc: find neighbor frames joint value and sorted them, use numpy median module 126 | # frame[x1,y1,[x2,y2],..]frame[x1,y1,[x2,y2],...], frame[x1,y1,[x2,y2],..] 127 | # ^---------------------|-------------------------^ 128 | x_v =[xy[x], forward[x], forward[x+_len], forward[x+_len*2], 129 | back[x], back[x+_len], back[x+_len*2]] 130 | y_v =[xy[y], forward[y], forward[y+_len], forward[y+_len*2], 131 | back[y], back[y+_len], back[y+_len*2]] 132 | 133 | # get median of vector 134 | x_med = np.median(sorted(x_v)) 135 | y_med = np.median(sorted(y_v)) 136 | 137 | # holding frame drops for joint 138 | if not x_med: 139 | # allow fix from first frame 140 | if frame: 141 | # get x from last frame 142 | x_med = smoothed[frame-1][x] 143 | # if joint is hidden y 144 | if not y_med: 145 | # allow fix from first frame 146 | if frame: 147 | # get y from last frame 148 | y_med = smoothed[frame-1][y] 149 | 150 | logger.debug("old X {0} sorted neighbor {1} new X {2}".format(xy[x],sorted(x_v), x_med)) 151 | logger.debug("old Y {0} sorted neighbor {1} new Y {2}".format(xy[y],sorted(y_v), y_med)) 152 | 153 | # build new array of joint x and y value 154 | frames_joint_median[x] = x_med 155 | frames_joint_median[x+1] = y_med 156 | 157 | 158 | smoothed[frame] = frames_joint_median 159 | 160 | # return frames cache incl. smooth 18 joints (x,y) 161 | return smoothed 162 | 163 | def main(_): 164 | smoothed = read_openpose_json() 165 | logger.info("reading and smoothing done. start feeding 3d-pose-baseline") 166 | plt.figure(2) 167 | smooth_curves_plot = show_anim_curves(smoothed, plt) 168 | pngName = 'png/smooth_plot.png' 169 | smooth_curves_plot.savefig(pngName) 170 | 171 | enc_in = np.zeros((1, 64)) 172 | enc_in[0] = [0 for i in range(64)] 173 | 174 | actions = data_utils.define_actions(FLAGS.action) 175 | 176 | SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] 177 | rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) 178 | train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( 179 | actions, FLAGS.data_dir) 180 | train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( 181 | actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) 182 | 183 | device_count = {"GPU": 1} 184 | png_lib = [] 185 | with tf.Session(config=tf.ConfigProto( 186 | device_count=device_count, 187 | allow_soft_placement=True)) as sess: 188 | #plt.figure(3) 189 | batch_size = 128 190 | model = create_model(sess, actions, batch_size) 191 | for n, (frame, xy) in enumerate(smoothed.items()): 192 | logger.info("calc frame {0}".format(frame)) 193 | # map list into np array 194 | joints_array = np.zeros((1, 36)) 195 | joints_array[0] = [0 for i in range(36)] 196 | for o in range(len(joints_array[0])): 197 | #feed array with xy array 198 | joints_array[0][o] = xy[o] 199 | _data = joints_array[0] 200 | # mapping all body parts or 3d-pose-baseline format 201 | for i in range(len(order)): 202 | for j in range(2): 203 | # create encoder input 204 | enc_in[0][order[i] * 2 + j] = _data[i * 2 + j] 205 | for j in range(2): 206 | # Hip 207 | enc_in[0][0 * 2 + j] = (enc_in[0][1 * 2 + j] + enc_in[0][6 * 2 + j]) / 2 208 | # Neck/Nose 209 | enc_in[0][14 * 2 + j] = (enc_in[0][15 * 2 + j] + enc_in[0][12 * 2 + j]) / 2 210 | # Thorax 211 | enc_in[0][13 * 2 + j] = 2 * enc_in[0][12 * 2 + j] - enc_in[0][14 * 2 + j] 212 | 213 | # set spine 214 | spine_x = enc_in[0][24] 215 | spine_y = enc_in[0][25] 216 | 217 | enc_in = enc_in[:, dim_to_use_2d] 218 | mu = data_mean_2d[dim_to_use_2d] 219 | stddev = data_std_2d[dim_to_use_2d] 220 | enc_in = np.divide((enc_in - mu), stddev) 221 | 222 | dp = 1.0 223 | dec_out = np.zeros((1, 48)) 224 | dec_out[0] = [0 for i in range(48)] 225 | _, _, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) 226 | all_poses_3d = [] 227 | enc_in = data_utils.unNormalizeData(enc_in, data_mean_2d, data_std_2d, dim_to_ignore_2d) 228 | poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) 229 | gs1 = gridspec.GridSpec(1, 1) 230 | gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. 231 | plt.axis('off') 232 | all_poses_3d.append( poses3d ) 233 | enc_in, poses3d = map( np.vstack, [enc_in, all_poses_3d] ) 234 | subplot_idx, exidx = 1, 1 235 | max = 0 236 | min = 10000 237 | 238 | for i in range(poses3d.shape[0]): 239 | for j in range(32): 240 | tmp = poses3d[i][j * 3 + 2] 241 | poses3d[i][j * 3 + 2] = poses3d[i][j * 3 + 1] 242 | poses3d[i][j * 3 + 1] = tmp 243 | if poses3d[i][j * 3 + 2] > max: 244 | max = poses3d[i][j * 3 + 2] 245 | if poses3d[i][j * 3 + 2] < min: 246 | min = poses3d[i][j * 3 + 2] 247 | 248 | for i in range(poses3d.shape[0]): 249 | for j in range(32): 250 | poses3d[i][j * 3 + 2] = max - poses3d[i][j * 3 + 2] + min 251 | poses3d[i][j * 3] += (spine_x - 630) 252 | poses3d[i][j * 3 + 2] += (500 - spine_y) 253 | 254 | # Plot 3d predictions 255 | ax = plt.subplot(gs1[subplot_idx - 1], projection='3d') 256 | ax.view_init(18, -70) 257 | logger.debug(np.min(poses3d)) 258 | if np.min(poses3d) < -1000: 259 | poses3d = before_pose 260 | 261 | p3d = poses3d 262 | logger.debug(poses3d) 263 | viz.show3Dpose(p3d, ax, lcolor="#9b59b6", rcolor="#2ecc71") 264 | 265 | pngName = 'png/test_{0}.png'.format(str(frame)) 266 | plt.savefig(pngName) 267 | png_lib.append(imageio.imread(pngName)) 268 | before_pose = poses3d 269 | 270 | 271 | logger.info("creating Gif png/movie_smoothing.gif, please Wait!") 272 | imageio.mimsave('png/movie_smoothing.gif', png_lib, fps=FLAGS.gif_fps) 273 | logger.info("Done!".format(pngName)) 274 | 275 | 276 | if __name__ == "__main__": 277 | 278 | openpose_output_dir = FLAGS.openpose 279 | 280 | level = {0:logging.ERROR, 281 | 1:logging.WARNING, 282 | 2:logging.INFO, 283 | 3:logging.DEBUG} 284 | 285 | logger.setLevel(level[FLAGS.verbose]) 286 | 287 | 288 | tf.app.run() -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_vmd/linear_model.py: -------------------------------------------------------------------------------- 1 | 2 | """Simple model to regress 3d human poses from 2d joint locations""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | from tensorflow.python.ops import variable_scope as vs 9 | 10 | import os 11 | import numpy as np 12 | from six.moves import xrange # pylint: disable=redefined-builtin 13 | import tensorflow as tf 14 | from . import data_utils 15 | from . import cameras as cam 16 | 17 | def kaiming(shape, dtype, partition_info=None): 18 | """Kaiming initialization as described in https://arxiv.org/pdf/1502.01852.pdf 19 | 20 | Args 21 | shape: dimensions of the tf array to initialize 22 | dtype: data type of the array 23 | partition_info: (Optional) info about how the variable is partitioned. 24 | See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/init_ops.py#L26 25 | Needed to be used as an initializer. 26 | Returns 27 | Tensorflow array with initial weights 28 | """ 29 | return(tf.truncated_normal(shape, dtype=dtype)*tf.sqrt(2/float(shape[0]))) 30 | 31 | class LinearModel(object): 32 | """ A simple Linear+RELU model """ 33 | 34 | def __init__(self, 35 | linear_size, 36 | num_layers, 37 | residual, 38 | batch_norm, 39 | max_norm, 40 | batch_size, 41 | learning_rate, 42 | summaries_dir, 43 | predict_14=False, 44 | dtype=tf.float32): 45 | """Creates the linear + relu model 46 | 47 | Args 48 | linear_size: integer. number of units in each layer of the model 49 | num_layers: integer. number of bilinear blocks in the model 50 | residual: boolean. Whether to add residual connections 51 | batch_norm: boolean. Whether to use batch normalization 52 | max_norm: boolean. Whether to clip weights to a norm of 1 53 | batch_size: integer. The size of the batches used during training 54 | learning_rate: float. Learning rate to start with 55 | summaries_dir: String. Directory where to log progress 56 | predict_14: boolean. Whether to predict 14 instead of 17 joints 57 | dtype: the data type to use to store internal variables 58 | """ 59 | 60 | # There are in total 17 joints in H3.6M and 16 in MPII (and therefore in stacked 61 | # hourglass detections). We settled with 16 joints in 2d just to make models 62 | # compatible (e.g. you can train on ground truth 2d and test on SH detections). 63 | # This does not seem to have an effect on prediction performance. 64 | self.HUMAN_2D_SIZE = 16 * 2 65 | 66 | # In 3d all the predictions are zero-centered around the root (hip) joint, so 67 | # we actually predict only 16 joints. The error is still computed over 17 joints, 68 | # because if one uses, e.g. Procrustes alignment, there is still error in the 69 | # hip to account for! 70 | # There is also an option to predict only 14 joints, which makes our results 71 | # directly comparable to those in https://arxiv.org/pdf/1611.09010.pdf 72 | self.HUMAN_3D_SIZE = 14 * 3 if predict_14 else 16 * 3 73 | 74 | self.input_size = self.HUMAN_2D_SIZE 75 | self.output_size = self.HUMAN_3D_SIZE 76 | 77 | self.isTraining = tf.placeholder(tf.bool,name="isTrainingflag") 78 | self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") 79 | 80 | # Summary writers for train and test runs 81 | self.train_writer = tf.summary.FileWriter( os.path.join(summaries_dir, 'train' )) 82 | self.test_writer = tf.summary.FileWriter( os.path.join(summaries_dir, 'test' )) 83 | 84 | self.linear_size = linear_size 85 | self.batch_size = batch_size 86 | self.learning_rate = tf.Variable( float(learning_rate), trainable=False, dtype=dtype, name="learning_rate") 87 | self.global_step = tf.Variable(0, trainable=False, name="global_step") 88 | decay_steps = 100000 # empirical 89 | decay_rate = 0.96 # empirical 90 | self.learning_rate = tf.train.exponential_decay(self.learning_rate, self.global_step, decay_steps, decay_rate) 91 | 92 | # === Transform the inputs === 93 | with vs.variable_scope("inputs"): 94 | 95 | # in=2d poses, out=3d poses 96 | enc_in = tf.placeholder(dtype, shape=[None, self.input_size], name="enc_in") 97 | dec_out = tf.placeholder(dtype, shape=[None, self.output_size], name="dec_out") 98 | 99 | self.encoder_inputs = enc_in 100 | self.decoder_outputs = dec_out 101 | 102 | # === Create the linear + relu combos === 103 | with vs.variable_scope( "linear_model" ): 104 | 105 | # === First layer, brings dimensionality up to linear_size === 106 | w1 = tf.get_variable( name="w1", initializer=kaiming, shape=[self.HUMAN_2D_SIZE, linear_size], dtype=dtype ) 107 | b1 = tf.get_variable( name="b1", initializer=kaiming, shape=[linear_size], dtype=dtype ) 108 | w1 = tf.clip_by_norm(w1,1) if max_norm else w1 109 | y3 = tf.matmul( enc_in, w1 ) + b1 110 | 111 | if batch_norm: 112 | y3 = tf.layers.batch_normalization(y3,training=self.isTraining, name="batch_normalization") 113 | y3 = tf.nn.relu( y3 ) 114 | y3 = tf.nn.dropout( y3, self.dropout_keep_prob ) 115 | 116 | # === Create multiple bi-linear layers === 117 | for idx in range( num_layers ): 118 | y3 = self.two_linear( y3, linear_size, residual, self.dropout_keep_prob, max_norm, batch_norm, dtype, idx ) 119 | 120 | # === Last linear layer has HUMAN_3D_SIZE in output === 121 | w4 = tf.get_variable( name="w4", initializer=kaiming, shape=[linear_size, self.HUMAN_3D_SIZE], dtype=dtype ) 122 | b4 = tf.get_variable( name="b4", initializer=kaiming, shape=[self.HUMAN_3D_SIZE], dtype=dtype ) 123 | w4 = tf.clip_by_norm(w4,1) if max_norm else w4 124 | y = tf.matmul(y3, w4) + b4 125 | # === End linear model === 126 | 127 | # Store the outputs here 128 | self.outputs = y 129 | self.loss = tf.reduce_mean(tf.square(y - dec_out)) 130 | self.loss_summary = tf.summary.scalar('loss/loss', self.loss) 131 | 132 | # To keep track of the loss in mm 133 | self.err_mm = tf.placeholder( tf.float32, name="error_mm" ) 134 | self.err_mm_summary = tf.summary.scalar( "loss/error_mm", self.err_mm ) 135 | 136 | # Gradients and update operation for training the model. 137 | opt = tf.train.AdamOptimizer( self.learning_rate ) 138 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 139 | 140 | with tf.control_dependencies(update_ops): 141 | 142 | # Update all the trainable parameters 143 | gradients = opt.compute_gradients(self.loss) 144 | self.gradients = [[] if i==None else i for i in gradients] 145 | self.updates = opt.apply_gradients(gradients, global_step=self.global_step) 146 | 147 | # Keep track of the learning rate 148 | self.learning_rate_summary = tf.summary.scalar('learning_rate/learning_rate', self.learning_rate) 149 | 150 | # To save the model 151 | self.saver = tf.train.Saver( tf.global_variables(), max_to_keep=10 ) 152 | 153 | 154 | def two_linear( self, xin, linear_size, residual, dropout_keep_prob, max_norm, batch_norm, dtype, idx ): 155 | """ 156 | Make a bi-linear block with optional residual connection 157 | 158 | Args 159 | xin: the batch that enters the block 160 | linear_size: integer. The size of the linear units 161 | residual: boolean. Whether to add a residual connection 162 | dropout_keep_prob: float [0,1]. Probability of dropping something out 163 | max_norm: boolean. Whether to clip weights to 1-norm 164 | batch_norm: boolean. Whether to do batch normalization 165 | dtype: type of the weigths. Usually tf.float32 166 | idx: integer. Number of layer (for naming/scoping) 167 | Returns 168 | y: the batch after it leaves the block 169 | """ 170 | 171 | with vs.variable_scope( "two_linear_"+str(idx) ) as scope: 172 | 173 | input_size = int(xin.get_shape()[1]) 174 | 175 | # Linear 1 176 | w2 = tf.get_variable( name="w2_"+str(idx), initializer=kaiming, shape=[input_size, linear_size], dtype=dtype) 177 | b2 = tf.get_variable( name="b2_"+str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype) 178 | w2 = tf.clip_by_norm(w2,1) if max_norm else w2 179 | y = tf.matmul(xin, w2) + b2 180 | if batch_norm: 181 | y = tf.layers.batch_normalization(y,training=self.isTraining,name="batch_normalization1"+str(idx)) 182 | 183 | y = tf.nn.relu( y ) 184 | y = tf.nn.dropout( y, dropout_keep_prob ) 185 | 186 | # Linear 2 187 | w3 = tf.get_variable( name="w3_"+str(idx), initializer=kaiming, shape=[linear_size, linear_size], dtype=dtype) 188 | b3 = tf.get_variable( name="b3_"+str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype) 189 | w3 = tf.clip_by_norm(w3,1) if max_norm else w3 190 | y = tf.matmul(y, w3) + b3 191 | 192 | if batch_norm: 193 | y = tf.layers.batch_normalization(y,training=self.isTraining,name="batch_normalization2"+str(idx)) 194 | 195 | y = tf.nn.relu( y ) 196 | y = tf.nn.dropout( y, dropout_keep_prob ) 197 | 198 | # Residual every 2 blocks 199 | y = (xin + y) if residual else y 200 | 201 | return y 202 | 203 | def step(self, session, encoder_inputs, decoder_outputs, dropout_keep_prob, isTraining=True): 204 | """Run a step of the model feeding the given inputs. 205 | 206 | Args 207 | session: tensorflow session to use 208 | encoder_inputs: list of numpy vectors to feed as encoder inputs 209 | decoder_outputs: list of numpy vectors that are the expected decoder outputs 210 | dropout_keep_prob: (0,1] dropout keep probability 211 | isTraining: whether to do the backward step or only forward 212 | 213 | Returns 214 | if isTraining is True, a 4-tuple 215 | loss: the computed loss of this batch 216 | loss_summary: tf summary of this batch loss, to log on tensorboard 217 | learning_rate_summary: tf summary of learnign rate to log on tensorboard 218 | outputs: predicted 3d poses 219 | if isTraining is False, a 3-tuple 220 | (loss, loss_summary, outputs) same as above 221 | """ 222 | 223 | input_feed = {self.encoder_inputs: encoder_inputs, 224 | self.decoder_outputs: decoder_outputs, 225 | self.isTraining: isTraining, 226 | self.dropout_keep_prob: dropout_keep_prob} 227 | 228 | # Output feed: depends on whether we do a backward step or not. 229 | if isTraining: 230 | output_feed = [self.updates, # Update Op that does SGD 231 | self.loss, 232 | self.loss_summary, 233 | self.learning_rate_summary, 234 | self.outputs] 235 | 236 | outputs = session.run( output_feed, input_feed ) 237 | return outputs[1], outputs[2], outputs[3], outputs[4] 238 | 239 | else: 240 | output_feed = [self.loss, # Loss for this batch. 241 | self.loss_summary, 242 | self.outputs] 243 | 244 | outputs = session.run(output_feed, input_feed) 245 | return outputs[0], outputs[1], outputs[2] # No gradient norm 246 | 247 | def get_all_batches( self, data_x, data_y, camera_frame, training=True ): 248 | """ 249 | Obtain a list of all the batches, randomly permutted 250 | Args 251 | data_x: dictionary with 2d inputs 252 | data_y: dictionary with 3d expected outputs 253 | camera_frame: whether the 3d data is in camera coordinates 254 | training: True if this is a training batch. False otherwise. 255 | 256 | Returns 257 | encoder_inputs: list of 2d batches 258 | decoder_outputs: list of 3d batches 259 | """ 260 | 261 | # Figure out how many frames we have 262 | n = 0 263 | for key2d in data_x.keys(): 264 | n2d, _ = data_x[ key2d ].shape 265 | n = n + n2d 266 | 267 | encoder_inputs = np.zeros((n, self.input_size), dtype=float) 268 | decoder_outputs = np.zeros((n, self.output_size), dtype=float) 269 | 270 | # Put all the data into big arrays 271 | idx = 0 272 | for key2d in data_x.keys(): 273 | (subj, b, fname) = key2d 274 | # keys should be the same if 3d is in camera coordinates 275 | key3d = key2d if (camera_frame) else (subj, b, '{0}.h5'.format(fname.split('.')[0])) 276 | key3d = (subj, b, fname[:-3]) if fname.endswith('-sh') and camera_frame else key3d 277 | 278 | n2d, _ = data_x[ key2d ].shape 279 | encoder_inputs[idx:idx+n2d, :] = data_x[ key2d ] 280 | decoder_outputs[idx:idx+n2d, :] = data_y[ key3d ] 281 | idx = idx + n2d 282 | 283 | 284 | if training: 285 | # Randomly permute everything 286 | idx = np.random.permutation( n ) 287 | encoder_inputs = encoder_inputs[idx, :] 288 | decoder_outputs = decoder_outputs[idx, :] 289 | 290 | # Make the number of examples a multiple of the batch size 291 | n_extra = n % self.batch_size 292 | if n_extra > 0: # Otherwise examples are already a multiple of batch size 293 | encoder_inputs = encoder_inputs[:-n_extra, :] 294 | decoder_outputs = decoder_outputs[:-n_extra, :] 295 | 296 | n_batches = n // self.batch_size 297 | encoder_inputs = np.split( encoder_inputs, n_batches ) 298 | decoder_outputs = np.split( decoder_outputs, n_batches ) 299 | 300 | return encoder_inputs, decoder_outputs 301 | -------------------------------------------------------------------------------- /components/FCRN_DepthPrediction_vmd/tensorflow/models/network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | # ---------------------------------------------------------------------------------- 5 | # Commonly used layers and operations based on ethereon's implementation 6 | # https://github.com/ethereon/caffe-tensorflow 7 | # Slight modifications may apply. FCRN-specific operations have also been appended. 8 | # ---------------------------------------------------------------------------------- 9 | # Thanks to *Helisa Dhamo* for the model conversion and integration into TensorFlow. 10 | # ---------------------------------------------------------------------------------- 11 | 12 | DEFAULT_PADDING = 'SAME' 13 | 14 | 15 | def get_incoming_shape(incoming): 16 | """ Returns the incoming data shape """ 17 | if isinstance(incoming, tf.Tensor): 18 | return incoming.get_shape().as_list() 19 | elif type(incoming) in [np.array, list, tuple]: 20 | return np.shape(incoming) 21 | else: 22 | raise Exception("Invalid incoming layer.") 23 | 24 | 25 | def interleave(tensors, axis): 26 | old_shape = get_incoming_shape(tensors[0])[1:] 27 | new_shape = [-1] + old_shape 28 | new_shape[axis] *= len(tensors) 29 | return tf.reshape(tf.stack(tensors, axis + 1), new_shape) 30 | 31 | def layer(op): 32 | '''Decorator for composable network layers.''' 33 | 34 | def layer_decorated(self, *args, **kwargs): 35 | # Automatically set a name if not provided. 36 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 37 | 38 | # Figure out the layer inputs. 39 | if len(self.terminals) == 0: 40 | raise RuntimeError('No input variables found for layer %s.' % name) 41 | elif len(self.terminals) == 1: 42 | layer_input = self.terminals[0] 43 | else: 44 | layer_input = list(self.terminals) 45 | # Perform the operation and get the output. 46 | layer_output = op(self, layer_input, *args, **kwargs) 47 | # Add to layer LUT. 48 | self.layers[name] = layer_output 49 | # This output is now the input for the next layer. 50 | self.feed(layer_output) 51 | # Return self for chained calls. 52 | return self 53 | 54 | return layer_decorated 55 | 56 | 57 | class Network(object): 58 | 59 | def __init__(self, inputs, batch, keep_prob, is_training, trainable = True): 60 | # The input nodes for this network 61 | self.inputs = inputs 62 | # The current list of terminal nodes 63 | self.terminals = [] 64 | # Mapping from layer names to layers 65 | self.layers = dict(inputs) 66 | # If true, the resulting variables are set as trainable 67 | self.trainable = trainable 68 | self.batch_size = batch 69 | self.keep_prob = keep_prob 70 | self.is_training = is_training 71 | self.setup() 72 | 73 | 74 | def setup(self): 75 | '''Construct the network. ''' 76 | raise NotImplementedError('Must be implemented by the subclass.') 77 | 78 | def load(self, data_path, session, ignore_missing=False): 79 | '''Load network weights. 80 | data_path: The path to the numpy-serialized network weights 81 | session: The current TensorFlow session 82 | ignore_missing: If true, serialized weights for missing layers are ignored. 83 | ''' 84 | data_dict = np.load(data_path, encoding='latin1').item() 85 | for op_name in data_dict: 86 | with tf.variable_scope(op_name, reuse=True): 87 | for param_name, data in iter(data_dict[op_name].items()): 88 | try: 89 | var = tf.get_variable(param_name) 90 | session.run(var.assign(data)) 91 | 92 | except ValueError: 93 | if not ignore_missing: 94 | raise 95 | 96 | def feed(self, *args): 97 | '''Set the input(s) for the next operation by replacing the terminal nodes. 98 | The arguments can be either layer names or the actual layers. 99 | ''' 100 | assert len(args) != 0 101 | self.terminals = [] 102 | for fed_layer in args: 103 | if isinstance(fed_layer, str): 104 | try: 105 | fed_layer = self.layers[fed_layer] 106 | except KeyError: 107 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 108 | self.terminals.append(fed_layer) 109 | return self 110 | 111 | def get_output(self): 112 | '''Returns the current network output.''' 113 | return self.terminals[-1] 114 | 115 | def get_layer_output(self, name): 116 | return self.layers[name] 117 | 118 | def get_unique_name(self, prefix): 119 | '''Returns an index-suffixed unique name for the given prefix. 120 | This is used for auto-generating layer names based on the type-prefix. 121 | ''' 122 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 123 | return '%s_%d' % (prefix, ident) 124 | 125 | def make_var(self, name, shape): 126 | '''Creates a new TensorFlow variable.''' 127 | return tf.get_variable(name, shape, dtype = 'float32', trainable=self.trainable) 128 | 129 | def validate_padding(self, padding): 130 | '''Verifies that the padding is one of the supported ones.''' 131 | assert padding in ('SAME', 'VALID') 132 | 133 | @layer 134 | def conv(self, 135 | input_data, 136 | k_h, 137 | k_w, 138 | c_o, 139 | s_h, 140 | s_w, 141 | name, 142 | relu=True, 143 | padding=DEFAULT_PADDING, 144 | group=1, 145 | biased=True): 146 | 147 | # Verify that the padding is acceptable 148 | self.validate_padding(padding) 149 | # Get the number of channels in the input 150 | c_i = input_data.get_shape()[-1] 151 | 152 | if (padding == 'SAME'): 153 | input_data = tf.pad(input_data, [[0, 0], [(k_h - 1)//2, (k_h - 1)//2], [(k_w - 1)//2, (k_w - 1)//2], [0, 0]], "CONSTANT") 154 | 155 | # Verify that the grouping parameter is valid 156 | assert c_i % group == 0 157 | assert c_o % group == 0 158 | # Convolution for a given input and kernel 159 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding='VALID') 160 | 161 | with tf.variable_scope(name) as scope: 162 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) 163 | 164 | if group == 1: 165 | # This is the common-case. Convolve the input without any further complications. 166 | output = convolve(input_data, kernel) 167 | else: 168 | # Split the input into groups and then convolve each of them independently 169 | 170 | input_groups = tf.split(3, group, input_data) 171 | kernel_groups = tf.split(3, group, kernel) 172 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] 173 | # Concatenate the groups 174 | output = tf.concat(3, output_groups) 175 | 176 | # Add the biases 177 | if biased: 178 | biases = self.make_var('biases', [c_o]) 179 | output = tf.nn.bias_add(output, biases) 180 | if relu: 181 | # ReLU non-linearity 182 | output = tf.nn.relu(output, name=scope.name) 183 | 184 | return output 185 | 186 | @layer 187 | def relu(self, input_data, name): 188 | return tf.nn.relu(input_data, name=name) 189 | 190 | @layer 191 | def max_pool(self, input_data, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): 192 | self.validate_padding(padding) 193 | return tf.nn.max_pool(input_data, 194 | ksize=[1, k_h, k_w, 1], 195 | strides=[1, s_h, s_w, 1], 196 | padding=padding, 197 | name=name) 198 | 199 | @layer 200 | def avg_pool(self, input_data, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): 201 | self.validate_padding(padding) 202 | return tf.nn.avg_pool(input_data, 203 | ksize=[1, k_h, k_w, 1], 204 | strides=[1, s_h, s_w, 1], 205 | padding=padding, 206 | name=name) 207 | 208 | @layer 209 | def lrn(self, input_data, radius, alpha, beta, name, bias=1.0): 210 | return tf.nn.local_response_normalization(input_data, 211 | depth_radius=radius, 212 | alpha=alpha, 213 | beta=beta, 214 | bias=bias, 215 | name=name) 216 | 217 | @layer 218 | def concat(self, inputs, axis, name): 219 | return tf.concat(concat_dim=axis, values=inputs, name=name) 220 | 221 | @layer 222 | def add(self, inputs, name): 223 | return tf.add_n(inputs, name=name) 224 | 225 | @layer 226 | def fc(self, input_data, num_out, name, relu=True): 227 | with tf.variable_scope(name) as scope: 228 | input_shape = input_data.get_shape() 229 | if input_shape.ndims == 4: 230 | # The input is spatial. Vectorize it first. 231 | dim = 1 232 | for d in input_shape[1:].as_list(): 233 | dim *= d 234 | feed_in = tf.reshape(input_data, [-1, dim]) 235 | else: 236 | feed_in, dim = (input_data, input_shape[-1].value) 237 | weights = self.make_var('weights', shape=[dim, num_out]) 238 | biases = self.make_var('biases', [num_out]) 239 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b 240 | fc = op(feed_in, weights, biases, name=scope.name) 241 | return fc 242 | 243 | @layer 244 | def softmax(self, input_data, name): 245 | input_shape = map(lambda v: v.value, input_data.get_shape()) 246 | if len(input_shape) > 2: 247 | # For certain models (like NiN), the singleton spatial dimensions 248 | # need to be explicitly squeezed, since they're not broadcast-able 249 | # in TensorFlow's NHWC ordering (unlike Caffe's NCHW). 250 | if input_shape[1] == 1 and input_shape[2] == 1: 251 | input_data = tf.squeeze(input_data, squeeze_dims=[1, 2]) 252 | else: 253 | raise ValueError('Rank 2 tensor input expected for softmax!') 254 | return tf.nn.softmax(input_data, name) 255 | 256 | @layer 257 | def batch_normalization(self, input_data, name, scale_offset=True, relu=False): 258 | 259 | with tf.variable_scope(name) as scope: 260 | shape = [input_data.get_shape()[-1]] 261 | pop_mean = tf.get_variable("mean", shape, initializer = tf.constant_initializer(0.0), trainable=False) 262 | pop_var = tf.get_variable("variance", shape, initializer = tf.constant_initializer(1.0), trainable=False) 263 | epsilon = 1e-4 264 | decay = 0.999 265 | if scale_offset: 266 | scale = tf.get_variable("scale", shape, initializer = tf.constant_initializer(1.0)) 267 | offset = tf.get_variable("offset", shape, initializer = tf.constant_initializer(0.0)) 268 | else: 269 | scale, offset = (None, None) 270 | if self.is_training: 271 | batch_mean, batch_var = tf.nn.moments(input_data, [0, 1, 2]) 272 | 273 | train_mean = tf.assign(pop_mean, 274 | pop_mean * decay + batch_mean * (1 - decay)) 275 | train_var = tf.assign(pop_var, 276 | pop_var * decay + batch_var * (1 - decay)) 277 | with tf.control_dependencies([train_mean, train_var]): 278 | output = tf.nn.batch_normalization(input_data, 279 | batch_mean, batch_var, offset, scale, epsilon, name = name) 280 | else: 281 | output = tf.nn.batch_normalization(input_data, 282 | pop_mean, pop_var, offset, scale, epsilon, name = name) 283 | 284 | if relu: 285 | output = tf.nn.relu(output) 286 | 287 | return output 288 | 289 | @layer 290 | def dropout(self, input_data, keep_prob, name): 291 | return tf.nn.dropout(input_data, keep_prob, name=name) 292 | 293 | 294 | def unpool_as_conv(self, size, input_data, id, stride = 1, ReLU = False, BN = True): 295 | 296 | # Model upconvolutions (unpooling + convolution) as interleaving feature 297 | # maps of four convolutions (A,B,C,D). Building block for up-projections. 298 | 299 | 300 | # Convolution A (3x3) 301 | # -------------------------------------------------- 302 | layerName = "layer%s_ConvA" % (id) 303 | self.feed(input_data) 304 | self.conv( 3, 3, size[3], stride, stride, name = layerName, padding = 'SAME', relu = False) 305 | outputA = self.get_output() 306 | 307 | # Convolution B (2x3) 308 | # -------------------------------------------------- 309 | layerName = "layer%s_ConvB" % (id) 310 | padded_input_B = tf.pad(input_data, [[0, 0], [1, 0], [1, 1], [0, 0]], "CONSTANT") 311 | self.feed(padded_input_B) 312 | self.conv(2, 3, size[3], stride, stride, name = layerName, padding = 'VALID', relu = False) 313 | outputB = self.get_output() 314 | 315 | # Convolution C (3x2) 316 | # -------------------------------------------------- 317 | layerName = "layer%s_ConvC" % (id) 318 | padded_input_C = tf.pad(input_data, [[0, 0], [1, 1], [1, 0], [0, 0]], "CONSTANT") 319 | self.feed(padded_input_C) 320 | self.conv(3, 2, size[3], stride, stride, name = layerName, padding = 'VALID', relu = False) 321 | outputC = self.get_output() 322 | 323 | # Convolution D (2x2) 324 | # -------------------------------------------------- 325 | layerName = "layer%s_ConvD" % (id) 326 | padded_input_D = tf.pad(input_data, [[0, 0], [1, 0], [1, 0], [0, 0]], "CONSTANT") 327 | self.feed(padded_input_D) 328 | self.conv(2, 2, size[3], stride, stride, name = layerName, padding = 'VALID', relu = False) 329 | outputD = self.get_output() 330 | 331 | # Interleaving elements of the four feature maps 332 | # -------------------------------------------------- 333 | left = interleave([outputA, outputB], axis=1) # columns 334 | right = interleave([outputC, outputD], axis=1) # columns 335 | Y = interleave([left, right], axis=2) # rows 336 | 337 | if BN: 338 | layerName = "layer%s_BN" % (id) 339 | self.feed(Y) 340 | self.batch_normalization(name = layerName, scale_offset = True, relu = False) 341 | Y = self.get_output() 342 | 343 | if ReLU: 344 | Y = tf.nn.relu(Y, name = layerName) 345 | 346 | return Y 347 | 348 | 349 | def up_project(self, size, id, stride = 1, BN = True): 350 | 351 | # Create residual upsampling layer (UpProjection) 352 | 353 | input_data = self.get_output() 354 | 355 | # Branch 1 356 | id_br1 = "%s_br1" % (id) 357 | 358 | # Interleaving Convs of 1st branch 359 | out = self.unpool_as_conv(size, input_data, id_br1, stride, ReLU=True, BN=True) 360 | 361 | # Convolution following the upProjection on the 1st branch 362 | layerName = "layer%s_Conv" % (id) 363 | self.feed(out) 364 | self.conv(size[0], size[1], size[3], stride, stride, name = layerName, relu = False) 365 | 366 | if BN: 367 | layerName = "layer%s_BN" % (id) 368 | self.batch_normalization(name = layerName, scale_offset=True, relu = False) 369 | 370 | # Output of 1st branch 371 | branch1_output = self.get_output() 372 | 373 | 374 | # Branch 2 375 | id_br2 = "%s_br2" % (id) 376 | # Interleaving convolutions and output of 2nd branch 377 | branch2_output = self.unpool_as_conv(size, input_data, id_br2, stride, ReLU=False) 378 | 379 | 380 | # sum branches 381 | layerName = "layer%s_Sum" % (id) 382 | output = tf.add_n([branch1_output, branch2_output], name = layerName) 383 | # ReLU 384 | layerName = "layer%s_ReLU" % (id) 385 | output = tf.nn.relu(output, name=layerName) 386 | 387 | self.feed(output) 388 | return self 389 | -------------------------------------------------------------------------------- /components/Three_D_PoseBaseline_multi/packages/lifting/utils/cpm.py: -------------------------------------------------------------------------------- 1 | """ 2 | TODO: Almost all variables in this file violate PEP 8 naming conventions 3 | """ 4 | 5 | import tensorflow as tf 6 | import tensorflow.contrib.layers as layers 7 | 8 | __all__ = [ 9 | 'inference_person', 10 | 'inference_pose' 11 | ] 12 | 13 | 14 | def inference_person(image): 15 | with tf.variable_scope('PersonNet'): 16 | conv1_1 = layers.conv2d( 17 | image, 64, 3, 1, activation_fn=None, scope='conv1_1') 18 | conv1_1 = tf.nn.relu(conv1_1) 19 | conv1_2 = layers.conv2d( 20 | conv1_1, 64, 3, 1, activation_fn=None, scope='conv1_2') 21 | conv1_2 = tf.nn.relu(conv1_2) 22 | pool1_stage1 = layers.max_pool2d(conv1_2, 2, 2) 23 | conv2_1 = layers.conv2d(pool1_stage1, 128, 3, 1, 24 | activation_fn=None, scope='conv2_1') 25 | conv2_1 = tf.nn.relu(conv2_1) 26 | conv2_2 = layers.conv2d( 27 | conv2_1, 128, 3, 1, activation_fn=None, scope='conv2_2') 28 | conv2_2 = tf.nn.relu(conv2_2) 29 | pool2_stage1 = layers.max_pool2d(conv2_2, 2, 2) 30 | conv3_1 = layers.conv2d(pool2_stage1, 256, 3, 1, 31 | activation_fn=None, scope='conv3_1') 32 | conv3_1 = tf.nn.relu(conv3_1) 33 | conv3_2 = layers.conv2d( 34 | conv3_1, 256, 3, 1, activation_fn=None, scope='conv3_2') 35 | conv3_2 = tf.nn.relu(conv3_2) 36 | conv3_3 = layers.conv2d( 37 | conv3_2, 256, 3, 1, activation_fn=None, scope='conv3_3') 38 | conv3_3 = tf.nn.relu(conv3_3) 39 | conv3_4 = layers.conv2d( 40 | conv3_3, 256, 3, 1, activation_fn=None, scope='conv3_4') 41 | conv3_4 = tf.nn.relu(conv3_4) 42 | pool3_stage1 = layers.max_pool2d(conv3_4, 2, 2) 43 | conv4_1 = layers.conv2d(pool3_stage1, 512, 3, 1, 44 | activation_fn=None, scope='conv4_1') 45 | conv4_1 = tf.nn.relu(conv4_1) 46 | conv4_2 = layers.conv2d( 47 | conv4_1, 512, 3, 1, activation_fn=None, scope='conv4_2') 48 | conv4_2 = tf.nn.relu(conv4_2) 49 | conv4_3 = layers.conv2d( 50 | conv4_2, 512, 3, 1, activation_fn=None, scope='conv4_3') 51 | conv4_3 = tf.nn.relu(conv4_3) 52 | conv4_4 = layers.conv2d( 53 | conv4_3, 512, 3, 1, activation_fn=None, scope='conv4_4') 54 | conv4_4 = tf.nn.relu(conv4_4) 55 | conv5_1 = layers.conv2d( 56 | conv4_4, 512, 3, 1, activation_fn=None, scope='conv5_1') 57 | conv5_1 = tf.nn.relu(conv5_1) 58 | conv5_2_CPM = layers.conv2d( 59 | conv5_1, 128, 3, 1, activation_fn=None, scope='conv5_2_CPM') 60 | conv5_2_CPM = tf.nn.relu(conv5_2_CPM) 61 | conv6_1_CPM = layers.conv2d( 62 | conv5_2_CPM, 512, 1, 1, activation_fn=None, scope='conv6_1_CPM') 63 | conv6_1_CPM = tf.nn.relu(conv6_1_CPM) 64 | conv6_2_CPM = layers.conv2d( 65 | conv6_1_CPM, 1, 1, 1, activation_fn=None, scope='conv6_2_CPM') 66 | concat_stage2 = tf.concat([conv6_2_CPM, conv5_2_CPM], 3) 67 | Mconv1_stage2 = layers.conv2d( 68 | concat_stage2, 128, 7, 1, activation_fn=None, 69 | scope='Mconv1_stage2') 70 | Mconv1_stage2 = tf.nn.relu(Mconv1_stage2) 71 | Mconv2_stage2 = layers.conv2d( 72 | Mconv1_stage2, 128, 7, 1, activation_fn=None, 73 | scope='Mconv2_stage2') 74 | Mconv2_stage2 = tf.nn.relu(Mconv2_stage2) 75 | Mconv3_stage2 = layers.conv2d( 76 | Mconv2_stage2, 128, 7, 1, activation_fn=None, 77 | scope='Mconv3_stage2') 78 | Mconv3_stage2 = tf.nn.relu(Mconv3_stage2) 79 | Mconv4_stage2 = layers.conv2d( 80 | Mconv3_stage2, 128, 7, 1, activation_fn=None, 81 | scope='Mconv4_stage2') 82 | Mconv4_stage2 = tf.nn.relu(Mconv4_stage2) 83 | Mconv5_stage2 = layers.conv2d( 84 | Mconv4_stage2, 128, 7, 1, activation_fn=None, 85 | scope='Mconv5_stage2') 86 | Mconv5_stage2 = tf.nn.relu(Mconv5_stage2) 87 | Mconv6_stage2 = layers.conv2d( 88 | Mconv5_stage2, 128, 1, 1, activation_fn=None, 89 | scope='Mconv6_stage2') 90 | Mconv6_stage2 = tf.nn.relu(Mconv6_stage2) 91 | Mconv7_stage2 = layers.conv2d( 92 | Mconv6_stage2, 1, 1, 1, activation_fn=None, scope='Mconv7_stage2') 93 | concat_stage3 = tf.concat([Mconv7_stage2, conv5_2_CPM], 3) 94 | Mconv1_stage3 = layers.conv2d( 95 | concat_stage3, 128, 7, 1, activation_fn=None, 96 | scope='Mconv1_stage3') 97 | Mconv1_stage3 = tf.nn.relu(Mconv1_stage3) 98 | Mconv2_stage3 = layers.conv2d( 99 | Mconv1_stage3, 128, 7, 1, activation_fn=None, 100 | scope='Mconv2_stage3') 101 | Mconv2_stage3 = tf.nn.relu(Mconv2_stage3) 102 | Mconv3_stage3 = layers.conv2d( 103 | Mconv2_stage3, 128, 7, 1, activation_fn=None, 104 | scope='Mconv3_stage3') 105 | Mconv3_stage3 = tf.nn.relu(Mconv3_stage3) 106 | Mconv4_stage3 = layers.conv2d( 107 | Mconv3_stage3, 128, 7, 1, activation_fn=None, 108 | scope='Mconv4_stage3') 109 | Mconv4_stage3 = tf.nn.relu(Mconv4_stage3) 110 | Mconv5_stage3 = layers.conv2d( 111 | Mconv4_stage3, 128, 7, 1, activation_fn=None, 112 | scope='Mconv5_stage3') 113 | Mconv5_stage3 = tf.nn.relu(Mconv5_stage3) 114 | Mconv6_stage3 = layers.conv2d( 115 | Mconv5_stage3, 128, 1, 1, activation_fn=None, 116 | scope='Mconv6_stage3') 117 | Mconv6_stage3 = tf.nn.relu(Mconv6_stage3) 118 | Mconv7_stage3 = layers.conv2d( 119 | Mconv6_stage3, 1, 1, 1, activation_fn=None, 120 | scope='Mconv7_stage3') 121 | concat_stage4 = tf.concat([Mconv7_stage3, conv5_2_CPM], 3) 122 | Mconv1_stage4 = layers.conv2d( 123 | concat_stage4, 128, 7, 1, activation_fn=None, 124 | scope='Mconv1_stage4') 125 | Mconv1_stage4 = tf.nn.relu(Mconv1_stage4) 126 | Mconv2_stage4 = layers.conv2d( 127 | Mconv1_stage4, 128, 7, 1, activation_fn=None, 128 | scope='Mconv2_stage4') 129 | Mconv2_stage4 = tf.nn.relu(Mconv2_stage4) 130 | Mconv3_stage4 = layers.conv2d( 131 | Mconv2_stage4, 128, 7, 1, activation_fn=None, 132 | scope='Mconv3_stage4') 133 | Mconv3_stage4 = tf.nn.relu(Mconv3_stage4) 134 | Mconv4_stage4 = layers.conv2d( 135 | Mconv3_stage4, 128, 7, 1, activation_fn=None, 136 | scope='Mconv4_stage4') 137 | Mconv4_stage4 = tf.nn.relu(Mconv4_stage4) 138 | Mconv5_stage4 = layers.conv2d( 139 | Mconv4_stage4, 128, 7, 1, activation_fn=None, 140 | scope='Mconv5_stage4') 141 | Mconv5_stage4 = tf.nn.relu(Mconv5_stage4) 142 | Mconv6_stage4 = layers.conv2d( 143 | Mconv5_stage4, 128, 1, 1, activation_fn=None, 144 | scope='Mconv6_stage4') 145 | Mconv6_stage4 = tf.nn.relu(Mconv6_stage4) 146 | Mconv7_stage4 = layers.conv2d( 147 | Mconv6_stage4, 1, 1, 1, activation_fn=None, scope='Mconv7_stage4') 148 | return Mconv7_stage4 149 | 150 | 151 | def inference_pose(image, center_map): 152 | with tf.variable_scope('PoseNet'): 153 | pool_center_lower = layers.avg_pool2d(center_map, 9, 8, padding='SAME') 154 | conv1_1 = layers.conv2d( 155 | image, 64, 3, 1, activation_fn=None, scope='conv1_1') 156 | conv1_1 = tf.nn.relu(conv1_1) 157 | conv1_2 = layers.conv2d( 158 | conv1_1, 64, 3, 1, activation_fn=None, scope='conv1_2') 159 | conv1_2 = tf.nn.relu(conv1_2) 160 | pool1_stage1 = layers.max_pool2d(conv1_2, 2, 2) 161 | conv2_1 = layers.conv2d(pool1_stage1, 128, 3, 1, 162 | activation_fn=None, scope='conv2_1') 163 | conv2_1 = tf.nn.relu(conv2_1) 164 | conv2_2 = layers.conv2d( 165 | conv2_1, 128, 3, 1, activation_fn=None, scope='conv2_2') 166 | conv2_2 = tf.nn.relu(conv2_2) 167 | pool2_stage1 = layers.max_pool2d(conv2_2, 2, 2) 168 | conv3_1 = layers.conv2d(pool2_stage1, 256, 3, 1, 169 | activation_fn=None, scope='conv3_1') 170 | conv3_1 = tf.nn.relu(conv3_1) 171 | conv3_2 = layers.conv2d( 172 | conv3_1, 256, 3, 1, activation_fn=None, scope='conv3_2') 173 | conv3_2 = tf.nn.relu(conv3_2) 174 | conv3_3 = layers.conv2d( 175 | conv3_2, 256, 3, 1, activation_fn=None, scope='conv3_3') 176 | conv3_3 = tf.nn.relu(conv3_3) 177 | conv3_4 = layers.conv2d( 178 | conv3_3, 256, 3, 1, activation_fn=None, scope='conv3_4') 179 | conv3_4 = tf.nn.relu(conv3_4) 180 | pool3_stage1 = layers.max_pool2d(conv3_4, 2, 2) 181 | conv4_1 = layers.conv2d(pool3_stage1, 512, 3, 1, 182 | activation_fn=None, scope='conv4_1') 183 | conv4_1 = tf.nn.relu(conv4_1) 184 | conv4_2 = layers.conv2d( 185 | conv4_1, 512, 3, 1, activation_fn=None, scope='conv4_2') 186 | conv4_2 = tf.nn.relu(conv4_2) 187 | conv4_3_CPM = layers.conv2d( 188 | conv4_2, 256, 3, 1, activation_fn=None, scope='conv4_3_CPM') 189 | conv4_3_CPM = tf.nn.relu(conv4_3_CPM) 190 | conv4_4_CPM = layers.conv2d( 191 | conv4_3_CPM, 256, 3, 1, activation_fn=None, scope='conv4_4_CPM') 192 | conv4_4_CPM = tf.nn.relu(conv4_4_CPM) 193 | conv4_5_CPM = layers.conv2d( 194 | conv4_4_CPM, 256, 3, 1, activation_fn=None, scope='conv4_5_CPM') 195 | conv4_5_CPM = tf.nn.relu(conv4_5_CPM) 196 | conv4_6_CPM = layers.conv2d( 197 | conv4_5_CPM, 256, 3, 1, activation_fn=None, scope='conv4_6_CPM') 198 | conv4_6_CPM = tf.nn.relu(conv4_6_CPM) 199 | conv4_7_CPM = layers.conv2d( 200 | conv4_6_CPM, 128, 3, 1, activation_fn=None, scope='conv4_7_CPM') 201 | conv4_7_CPM = tf.nn.relu(conv4_7_CPM) 202 | conv5_1_CPM = layers.conv2d( 203 | conv4_7_CPM, 512, 1, 1, activation_fn=None, scope='conv5_1_CPM') 204 | conv5_1_CPM = tf.nn.relu(conv5_1_CPM) 205 | conv5_2_CPM = layers.conv2d( 206 | conv5_1_CPM, 15, 1, 1, activation_fn=None, scope='conv5_2_CPM') 207 | concat_stage2 = tf.concat( 208 | [conv5_2_CPM, conv4_7_CPM, pool_center_lower], 3) 209 | Mconv1_stage2 = layers.conv2d( 210 | concat_stage2, 128, 7, 1, activation_fn=None, 211 | scope='Mconv1_stage2') 212 | Mconv1_stage2 = tf.nn.relu(Mconv1_stage2) 213 | Mconv2_stage2 = layers.conv2d( 214 | Mconv1_stage2, 128, 7, 1, activation_fn=None, 215 | scope='Mconv2_stage2') 216 | Mconv2_stage2 = tf.nn.relu(Mconv2_stage2) 217 | Mconv3_stage2 = layers.conv2d( 218 | Mconv2_stage2, 128, 7, 1, activation_fn=None, 219 | scope='Mconv3_stage2') 220 | Mconv3_stage2 = tf.nn.relu(Mconv3_stage2) 221 | Mconv4_stage2 = layers.conv2d( 222 | Mconv3_stage2, 128, 7, 1, activation_fn=None, 223 | scope='Mconv4_stage2') 224 | Mconv4_stage2 = tf.nn.relu(Mconv4_stage2) 225 | Mconv5_stage2 = layers.conv2d( 226 | Mconv4_stage2, 128, 7, 1, activation_fn=None, 227 | scope='Mconv5_stage2') 228 | Mconv5_stage2 = tf.nn.relu(Mconv5_stage2) 229 | Mconv6_stage2 = layers.conv2d( 230 | Mconv5_stage2, 128, 1, 1, activation_fn=None, 231 | scope='Mconv6_stage2') 232 | Mconv6_stage2 = tf.nn.relu(Mconv6_stage2) 233 | Mconv7_stage2 = layers.conv2d( 234 | Mconv6_stage2, 15, 1, 1, activation_fn=None, scope='Mconv7_stage2') 235 | concat_stage3 = tf.concat( 236 | [Mconv7_stage2, conv4_7_CPM, pool_center_lower], 3) 237 | Mconv1_stage3 = layers.conv2d( 238 | concat_stage3, 128, 7, 1, activation_fn=None, 239 | scope='Mconv1_stage3') 240 | Mconv1_stage3 = tf.nn.relu(Mconv1_stage3) 241 | Mconv2_stage3 = layers.conv2d( 242 | Mconv1_stage3, 128, 7, 1, activation_fn=None, 243 | scope='Mconv2_stage3') 244 | Mconv2_stage3 = tf.nn.relu(Mconv2_stage3) 245 | Mconv3_stage3 = layers.conv2d( 246 | Mconv2_stage3, 128, 7, 1, activation_fn=None, 247 | scope='Mconv3_stage3') 248 | Mconv3_stage3 = tf.nn.relu(Mconv3_stage3) 249 | Mconv4_stage3 = layers.conv2d( 250 | Mconv3_stage3, 128, 7, 1, activation_fn=None, 251 | scope='Mconv4_stage3') 252 | Mconv4_stage3 = tf.nn.relu(Mconv4_stage3) 253 | Mconv5_stage3 = layers.conv2d( 254 | Mconv4_stage3, 128, 7, 1, activation_fn=None, 255 | scope='Mconv5_stage3') 256 | Mconv5_stage3 = tf.nn.relu(Mconv5_stage3) 257 | Mconv6_stage3 = layers.conv2d( 258 | Mconv5_stage3, 128, 1, 1, activation_fn=None, 259 | scope='Mconv6_stage3') 260 | Mconv6_stage3 = tf.nn.relu(Mconv6_stage3) 261 | Mconv7_stage3 = layers.conv2d( 262 | Mconv6_stage3, 15, 1, 1, activation_fn=None, scope='Mconv7_stage3') 263 | concat_stage4 = tf.concat( 264 | [Mconv7_stage3, conv4_7_CPM, pool_center_lower], 3) 265 | Mconv1_stage4 = layers.conv2d( 266 | concat_stage4, 128, 7, 1, activation_fn=None, 267 | scope='Mconv1_stage4') 268 | Mconv1_stage4 = tf.nn.relu(Mconv1_stage4) 269 | Mconv2_stage4 = layers.conv2d( 270 | Mconv1_stage4, 128, 7, 1, activation_fn=None, 271 | scope='Mconv2_stage4') 272 | Mconv2_stage4 = tf.nn.relu(Mconv2_stage4) 273 | Mconv3_stage4 = layers.conv2d( 274 | Mconv2_stage4, 128, 7, 1, activation_fn=None, 275 | scope='Mconv3_stage4') 276 | Mconv3_stage4 = tf.nn.relu(Mconv3_stage4) 277 | Mconv4_stage4 = layers.conv2d( 278 | Mconv3_stage4, 128, 7, 1, activation_fn=None, 279 | scope='Mconv4_stage4') 280 | Mconv4_stage4 = tf.nn.relu(Mconv4_stage4) 281 | Mconv5_stage4 = layers.conv2d( 282 | Mconv4_stage4, 128, 7, 1, activation_fn=None, 283 | scope='Mconv5_stage4') 284 | Mconv5_stage4 = tf.nn.relu(Mconv5_stage4) 285 | Mconv6_stage4 = layers.conv2d( 286 | Mconv5_stage4, 128, 1, 1, activation_fn=None, 287 | scope='Mconv6_stage4') 288 | Mconv6_stage4 = tf.nn.relu(Mconv6_stage4) 289 | Mconv7_stage4 = layers.conv2d( 290 | Mconv6_stage4, 15, 1, 1, activation_fn=None, scope='Mconv7_stage4') 291 | concat_stage5 = tf.concat( 292 | [Mconv7_stage4, conv4_7_CPM, pool_center_lower], 3) 293 | Mconv1_stage5 = layers.conv2d( 294 | concat_stage5, 128, 7, 1, activation_fn=None, 295 | scope='Mconv1_stage5') 296 | Mconv1_stage5 = tf.nn.relu(Mconv1_stage5) 297 | Mconv2_stage5 = layers.conv2d( 298 | Mconv1_stage5, 128, 7, 1, activation_fn=None, 299 | scope='Mconv2_stage5') 300 | Mconv2_stage5 = tf.nn.relu(Mconv2_stage5) 301 | Mconv3_stage5 = layers.conv2d( 302 | Mconv2_stage5, 128, 7, 1, activation_fn=None, 303 | scope='Mconv3_stage5') 304 | Mconv3_stage5 = tf.nn.relu(Mconv3_stage5) 305 | Mconv4_stage5 = layers.conv2d( 306 | Mconv3_stage5, 128, 7, 1, activation_fn=None, 307 | scope='Mconv4_stage5') 308 | Mconv4_stage5 = tf.nn.relu(Mconv4_stage5) 309 | Mconv5_stage5 = layers.conv2d( 310 | Mconv4_stage5, 128, 7, 1, activation_fn=None, 311 | scope='Mconv5_stage5') 312 | Mconv5_stage5 = tf.nn.relu(Mconv5_stage5) 313 | Mconv6_stage5 = layers.conv2d( 314 | Mconv5_stage5, 128, 1, 1, activation_fn=None, 315 | scope='Mconv6_stage5') 316 | Mconv6_stage5 = tf.nn.relu(Mconv6_stage5) 317 | Mconv7_stage5 = layers.conv2d( 318 | Mconv6_stage5, 15, 1, 1, activation_fn=None, scope='Mconv7_stage5') 319 | concat_stage6 = tf.concat( 320 | [Mconv7_stage5, conv4_7_CPM, pool_center_lower], 3) 321 | Mconv1_stage6 = layers.conv2d( 322 | concat_stage6, 128, 7, 1, activation_fn=None, 323 | scope='Mconv1_stage6') 324 | Mconv1_stage6 = tf.nn.relu(Mconv1_stage6) 325 | Mconv2_stage6 = layers.conv2d( 326 | Mconv1_stage6, 128, 7, 1, activation_fn=None, 327 | scope='Mconv2_stage6') 328 | Mconv2_stage6 = tf.nn.relu(Mconv2_stage6) 329 | Mconv3_stage6 = layers.conv2d( 330 | Mconv2_stage6, 128, 7, 1, activation_fn=None, 331 | scope='Mconv3_stage6') 332 | Mconv3_stage6 = tf.nn.relu(Mconv3_stage6) 333 | Mconv4_stage6 = layers.conv2d( 334 | Mconv3_stage6, 128, 7, 1, activation_fn=None, 335 | scope='Mconv4_stage6') 336 | Mconv4_stage6 = tf.nn.relu(Mconv4_stage6) 337 | Mconv5_stage6 = layers.conv2d( 338 | Mconv4_stage6, 128, 7, 1, activation_fn=None, 339 | scope='Mconv5_stage6') 340 | Mconv5_stage6 = tf.nn.relu(Mconv5_stage6) 341 | Mconv6_stage6 = layers.conv2d( 342 | Mconv5_stage6, 128, 1, 1, activation_fn=None, 343 | scope='Mconv6_stage6') 344 | Mconv6_stage6 = tf.nn.relu(Mconv6_stage6) 345 | Mconv7_stage6 = layers.conv2d( 346 | Mconv6_stage6, 15, 1, 1, activation_fn=None, 347 | scope='Mconv7_stage6') 348 | return Mconv7_stage6 349 | --------------------------------------------------------------------------------