├── utils ├── __init__.py ├── fisheye │ ├── __init__.py │ ├── mean3D.mat │ ├── pose_fisheye_fisheye.calibration_new.json │ ├── fisheye.calibration-bak.json │ ├── fisheye.calibration.json │ ├── FishEyeEquisolid.py │ └── FishEyeCalibrated.py ├── torch_closest_rot_mat.py ├── data_utils.py ├── pytorch_gaussian.py ├── one_euro_filter.py ├── captury_studio_camera.py ├── pose_visualization_utils.py ├── pytorch_gmm.py ├── rigid_transform_with_scale.py ├── pytorch_gmm_from_scipy.py ├── utils.py └── skeleton.py ├── networks ├── __init__.py ├── dataset │ ├── __init__.py │ ├── local_dataset.py │ └── global_dataset.py ├── models │ ├── __init__.py │ ├── BaseVAE.py │ └── SeqConvVAE.py ├── train_global_mo2cap2.sh ├── train_local_mo2cap2.sh ├── train_global.sh ├── train_local.sh ├── README.md ├── config.py ├── sample.py ├── get_latent.py ├── make_dataset.py ├── train.py ├── interpolant.py ├── train_local_global.py └── train_local.py ├── MakeDataForOptimization ├── utils │ ├── __init__.py │ ├── fisheye │ │ ├── __init__.py │ │ ├── mean3D.mat │ │ ├── pose_fisheye_fisheye.calibration_new.json │ │ ├── fisheye.calibration-bak.json │ │ ├── fisheye.calibration.json │ │ ├── FishEyeEquisolid.py │ │ └── FishEyeCalibrated.py │ ├── calibrations │ │ └── new_fisheye.json │ ├── pose_visualization_utils.py │ ├── rigid_transform_with_scale.py │ ├── utils.py │ └── skeleton.py ├── bvh_reader │ ├── npybvh │ │ ├── __init__.py │ │ ├── images │ │ │ ├── keyframe.png │ │ │ └── skeleton.png │ │ ├── LICENSE │ │ ├── readme.md │ │ └── bvh.py │ └── read_egocentric_joint_position.py ├── README.md ├── slam_reader.py └── process_test_data.py ├── .gitignore ├── README.md ├── optimize_whole_sequence.py └── calculate_errors.py /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /networks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /networks/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /networks/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/fisheye/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MakeDataForOptimization/bvh_reader/npybvh/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/fisheye/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | networks/logs 4 | data 5 | AMASSDataConverter -------------------------------------------------------------------------------- /utils/fisheye/mean3D.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianwang-mpi/GlobalEgoMocap/HEAD/utils/fisheye/mean3D.mat -------------------------------------------------------------------------------- /utils/torch_closest_rot_mat.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def square_root(mat): 4 | e,v = torch.symeig(mat) 5 | 6 | def closest_rot_mat(rot_mat): 7 | pass -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/fisheye/mean3D.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianwang-mpi/GlobalEgoMocap/HEAD/MakeDataForOptimization/utils/fisheye/mean3D.mat -------------------------------------------------------------------------------- /MakeDataForOptimization/bvh_reader/npybvh/images/keyframe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianwang-mpi/GlobalEgoMocap/HEAD/MakeDataForOptimization/bvh_reader/npybvh/images/keyframe.png -------------------------------------------------------------------------------- /MakeDataForOptimization/bvh_reader/npybvh/images/skeleton.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianwang-mpi/GlobalEgoMocap/HEAD/MakeDataForOptimization/bvh_reader/npybvh/images/skeleton.png -------------------------------------------------------------------------------- /utils/data_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def mpjpe_on_different_joints(pose_preds, pose_gt): 4 | distance = np.linalg.norm(pose_gt - pose_preds, axis=-1) 5 | distance_on_each_joints = np.mean(distance, axis=tuple(range(distance.ndim - 1))) 6 | return distance_on_each_joints -------------------------------------------------------------------------------- /networks/train_global_mo2cap2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python train.py --log_dir cnn_global_mo2cap2_dataset --train_data_path /home/jianwang/ScanNet/static00/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --new_dataset False --with_mo2cap2_data True --fps 25 --network cnn -------------------------------------------------------------------------------- /networks/train_local_mo2cap2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python train_local.py --log_dir cnn_local_mo2cap2_dataset --train_data_path /home/jianwang/ScanNet/static00/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --new_dataset False --with_mo2cap2_data True --fps 25 --network cnn -------------------------------------------------------------------------------- /networks/train_global.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python train.py --log_dir cnn_global_full_dataset_latent_2048_len_10_kl_0.5 --train_data_path /home/jianwang/ScanNet/static00/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --new_dataset False --with_mo2cap2_data False --fps 25 --network cnn -------------------------------------------------------------------------------- /networks/train_local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python train_local.py --log_dir mlp_local_full_dataset_latent_2048_len_10_kl_0.5 --train_data_path /home/jianwang/ScanNet/static00/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --with_mo2cap2_data False --new_dataset False --fps 25 --network mlp -------------------------------------------------------------------------------- /MakeDataForOptimization/README.md: -------------------------------------------------------------------------------- 1 | ## Get Data for Optimization 2 | 3 | 1. run Mo2Cap2 or other pose estimation network to get heatmap 4 | and joint depth files 5 | 6 | 2. get gt pose from BVH file and save it in to the pickle file 7 | 8 | see directory ```bvh_reader``` 9 | 10 | 3. run OpenVSLAM on image sequence to get slam file 11 | ```frame_trajectory.txt``` 12 | 13 | 4. generate data for optimization 14 | 15 | see ```process_test_data.py``` -------------------------------------------------------------------------------- /utils/fisheye/pose_fisheye_fisheye.calibration_new.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "new", 3 | "sensor": [1, 1], 4 | "size": [1280, 1024], 5 | "animated": 0, 6 | "intrinsic": [ 7 | [500, 0, 639.074101, 0], 8 | [0, 500, 511.081780, 0], 9 | [0, 0, 1, 0], 10 | [0, 0, 0, 1] 11 | ], 12 | "imageCircleRadius": 5.120000000000000E+02, 13 | "polynomialC2W": [-4.083907e+02, 0.000000e+00, 1.679882e-03, -3.677087e-06, 7.461604e-09 ], 14 | "polynomialW2C": [492.969845, 193.289959, -28.612327, 51.744505, -2.120082, 13.644155, 1.512262, 15 | -18.789714, 18.962317, 14.989157, -12.692345, -5.804379, 3.508978, 1.511979 ], 16 | "affine": [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04], 17 | "extrinsic": [ 18 | [1, 0, 0, 0], 19 | [0, 1, 0, 0], 20 | [0, 0, 1, 0], 21 | [0, 0, 0, 1] 22 | ], 23 | "radial":0 24 | } 25 | 26 | -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/calibrations/new_fisheye.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "new", 3 | "sensor": [1, 1], 4 | "size": [1280, 1024], 5 | "animated": 0, 6 | "intrinsic": [ 7 | [500, 0, 639.074101, 0], 8 | [0, 500, 511.081780, 0], 9 | [0, 0, 1, 0], 10 | [0, 0, 0, 1] 11 | ], 12 | "imageCircleRadius": 5.120000000000000E+02, 13 | "polynomialC2W": [-4.083907e+02, 0.000000e+00, 1.679882e-03, -3.677087e-06, 7.461604e-09 ], 14 | "polynomialW2C": [492.969845, 193.289959, -28.612327, 51.744505, -2.120082, 13.644155, 1.512262, 15 | -18.789714, 18.962317, 14.989157, -12.692345, -5.804379, 3.508978, 1.511979 ], 16 | "affine": [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04], 17 | "extrinsic": [ 18 | [1, 0, 0, 0], 19 | [0, 1, 0, 0], 20 | [0, 0, 1, 0], 21 | [0, 0, 0, 1] 22 | ], 23 | "radial":0 24 | } 25 | -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/fisheye/pose_fisheye_fisheye.calibration_new.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "new", 3 | "sensor": [1, 1], 4 | "size": [1280, 1024], 5 | "animated": 0, 6 | "intrinsic": [ 7 | [500, 0, 639.074101, 0], 8 | [0, 500, 511.081780, 0], 9 | [0, 0, 1, 0], 10 | [0, 0, 0, 1] 11 | ], 12 | "imageCircleRadius": 5.120000000000000E+02, 13 | "polynomialC2W": [-4.083907e+02, 0.000000e+00, 1.679882e-03, -3.677087e-06, 7.461604e-09 ], 14 | "polynomialW2C": [492.969845, 193.289959, -28.612327, 51.744505, -2.120082, 13.644155, 1.512262, 15 | -18.789714, 18.962317, 14.989157, -12.692345, -5.804379, 3.508978, 1.511979 ], 16 | "affine": [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04], 17 | "extrinsic": [ 18 | [1, 0, 0, 0], 19 | [0, 1, 0, 0], 20 | [0, 0, 1, 0], 21 | [0, 0, 0, 1] 22 | ], 23 | "radial":0 24 | } 25 | 26 | -------------------------------------------------------------------------------- /networks/models/BaseVAE.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from abc import abstractmethod 3 | import torch 4 | 5 | 6 | class BaseVAE(nn.Module): 7 | 8 | def __init__(self) -> None: 9 | super(BaseVAE, self).__init__() 10 | 11 | def encode(self, input: torch.Tensor): 12 | raise NotImplementedError 13 | 14 | def decode(self, input: torch.Tensor): 15 | raise NotImplementedError 16 | 17 | def sample(self, batch_size, current_device, **kwargs) -> torch.Tensor: 18 | raise RuntimeWarning() 19 | 20 | def generate(self, x: torch.Tensor, **kwargs) -> torch.Tensor: 21 | raise NotImplementedError 22 | 23 | @abstractmethod 24 | def forward(self, *inputs: torch.Tensor) -> torch.Tensor: 25 | pass 26 | 27 | @abstractmethod 28 | def loss_function(self, *inputs, **kwargs) -> torch.Tensor: 29 | pass 30 | -------------------------------------------------------------------------------- /utils/pytorch_gaussian.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from scipy.ndimage import gaussian_filter1d 3 | import numpy as np 4 | import torch.nn.functional as F 5 | 6 | 7 | 8 | def gaussian(window_size, sigma): 9 | def gauss_fcn(x): 10 | return -(x - window_size // 2)**2 / float(2 * sigma**2) 11 | gauss = torch.stack( 12 | [torch.exp(torch.tensor(gauss_fcn(x))) for x in range(window_size)]) 13 | return gauss / gauss.sum() 14 | 15 | if __name__ == '__main__': 16 | pose_random = np.random.randn() 17 | smoothed_skeleton_seq_np = gaussian_filter1d(pose_random, sigma=1) 18 | print(smoothed_skeleton_seq_np) 19 | 20 | 21 | pose_random_torch = torch.from_numpy(pose_random).unsqueeze_(0).unsqueeze_(0).float() 22 | kernel = gaussian(window_size=5, sigma=1) 23 | kernel = kernel.unsqueeze_(0).unsqueeze_(0).float() 24 | 25 | smoothed_skeleton_seq_torch = F.conv1d(pose_random_torch, kernel) 26 | print(smoothed_skeleton_seq_torch) -------------------------------------------------------------------------------- /utils/fisheye/fisheye.calibration-bak.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "egosyn", 3 | "sensor": [1, 1], 4 | "size": [1280, 1024], 5 | "animated": 0, 6 | "intrinsic": [ 7 | [500, 0, 6.398405647786816e+02, 0], 8 | [0, 500, 512.11, 0], 9 | [0, 0, 1, 0], 10 | [0, 0, 0, 1] 11 | ], 12 | "imageCircleRadius": 5.120000000000000E+02, 13 | "polynomialC2W": [-241.024, 0.000000000000000E+00, 0.001746, -0.000004, 14 | 6.47811e-09], 15 | "polynomialW2C": [4.785893205484341E+02, 3.503715828980770E+02, 7.900065565120241E+01, 6.228794005673283E+01, 16 | 3.264466851189552E+01, 1.568380500967838E+01, 7.766879336977007E+00, 2.190791369989537E+00, 17 | -1.084229689289942E-01, -1.903842667463734E-01, -2.776267870029922E-02], 18 | "affine": [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04], 19 | "extrinsic": [ 20 | [1, 0, 0, 0], 21 | [0, 1, 0, 0], 22 | [0, 0, 1, 0], 23 | [0, 0, 0, 1] 24 | ], 25 | "radial":0 26 | } 27 | 28 | -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/fisheye/fisheye.calibration-bak.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "egosyn", 3 | "sensor": [1, 1], 4 | "size": [1280, 1024], 5 | "animated": 0, 6 | "intrinsic": [ 7 | [500, 0, 6.398405647786816e+02, 0], 8 | [0, 500, 512.11, 0], 9 | [0, 0, 1, 0], 10 | [0, 0, 0, 1] 11 | ], 12 | "imageCircleRadius": 5.120000000000000E+02, 13 | "polynomialC2W": [-241.024, 0.000000000000000E+00, 0.001746, -0.000004, 14 | 6.47811e-09], 15 | "polynomialW2C": [4.785893205484341E+02, 3.503715828980770E+02, 7.900065565120241E+01, 6.228794005673283E+01, 16 | 3.264466851189552E+01, 1.568380500967838E+01, 7.766879336977007E+00, 2.190791369989537E+00, 17 | -1.084229689289942E-01, -1.903842667463734E-01, -2.776267870029922E-02], 18 | "affine": [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04], 19 | "extrinsic": [ 20 | [1, 0, 0, 0], 21 | [0, 1, 0, 0], 22 | [0, 0, 1, 0], 23 | [0, 0, 0, 1] 24 | ], 25 | "radial":0 26 | } 27 | 28 | -------------------------------------------------------------------------------- /networks/README.md: -------------------------------------------------------------------------------- 1 | ## Motion VAE 2 | 3 | ### Download pre-processed dataset 4 | 5 | 1. Download the [pre-processed dataset](https://nextcloud.mpi-klsb.mpg.de/index.php/s/HWHGwZLi8xnLsRF). 6 | 2. Unzip the zip file: ```unzip EgocentricAMASS.zip``` 7 | 8 | ### train the motion VAE 9 | 10 | 1. global motion VAE 11 | 12 | ``` 13 | python train.py --log_dir cnn_global_full_dataset_latent_2048_len_10_kl_0.5 --train_data_path /path/to/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --new_dataset False --with_mo2cap2_data False --fps 25 --network cnn 14 | ``` 15 | 16 | 2. local motion VAE 17 | 18 | ``` 19 | python train_local.py --log_dir mlp_local_full_dataset_latent_2048_len_10_kl_0.5 --train_data_path /path/to/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --with_mo2cap2_data False --new_dataset False --fps 25 --network mlp 20 | ``` 21 | 22 | ### sample the motion from latent space 23 | 24 | see ```sample.py``` 25 | 26 | ### interpolant two different motions 27 | 28 | see ```interpolant.py``` 29 | -------------------------------------------------------------------------------- /utils/fisheye/fisheye.calibration.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "egosyn", 3 | "sensor": [1, 1], 4 | "size": [1280, 1024], 5 | "animated": 0, 6 | "intrinsic": [ 7 | [500, 0, 6.597087109684564E+02, 0], 8 | [0, 500, 5.300556618148025E+02, 0], 9 | [0, 0, 1, 0], 10 | [0, 0, 0, 1] 11 | ], 12 | "imageCircleRadius": 5.120000000000000E+02, 13 | "polynomialC2W": [-2.924126419694919E+02, 0.000000000000000E+00, 1.075613595858202E-03, 2.072664555244253E-07, 14 | 4.493499097653669E-10, -1.192028310212584E-15, -1.822337421183959E-17], 15 | "polynomialW2C": [4.785893205484341E+02, 3.503715828980770E+02, 7.900065565120241E+01, 6.228794005673283E+01, 16 | 3.264466851189552E+01, 1.568380500967838E+01, 7.766879336977007E+00, 2.190791369989537E+00, 17 | -1.084229689289942E-01, -1.903842667463734E-01, -2.776267870029922E-02], 18 | "affine": [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04], 19 | "extrinsic": [ 20 | [1, 0, 0, 0], 21 | [0, 1, 0, 0], 22 | [0, 0, 1, 0], 23 | [0, 0, 0, 1] 24 | ], 25 | "radial":0 26 | } 27 | 28 | -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/fisheye/fisheye.calibration.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "egosyn", 3 | "sensor": [1, 1], 4 | "size": [1280, 1024], 5 | "animated": 0, 6 | "intrinsic": [ 7 | [500, 0, 6.597087109684564E+02, 0], 8 | [0, 500, 5.300556618148025E+02, 0], 9 | [0, 0, 1, 0], 10 | [0, 0, 0, 1] 11 | ], 12 | "imageCircleRadius": 5.120000000000000E+02, 13 | "polynomialC2W": [-2.924126419694919E+02, 0.000000000000000E+00, 1.075613595858202E-03, 2.072664555244253E-07, 14 | 4.493499097653669E-10, -1.192028310212584E-15, -1.822337421183959E-17], 15 | "polynomialW2C": [4.785893205484341E+02, 3.503715828980770E+02, 7.900065565120241E+01, 6.228794005673283E+01, 16 | 3.264466851189552E+01, 1.568380500967838E+01, 7.766879336977007E+00, 2.190791369989537E+00, 17 | -1.084229689289942E-01, -1.903842667463734E-01, -2.776267870029922E-02], 18 | "affine": [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04], 19 | "extrinsic": [ 20 | [1, 0, 0, 0], 21 | [0, 1, 0, 0], 22 | [0, 0, 1, 0], 23 | [0, 0, 0, 1] 24 | ], 25 | "radial":0 26 | } 27 | 28 | -------------------------------------------------------------------------------- /MakeDataForOptimization/bvh_reader/npybvh/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Mathias Parger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MakeDataForOptimization/bvh_reader/npybvh/readme.md: -------------------------------------------------------------------------------- 1 | # npybvh 2 | Python 3 module for reading Biovision Hierarchy (BVH) files and convert keyframes to poses. 3 | 4 | # Features 5 | - Read Biovision Hierarchy files 6 | - Extract skeleton information (joint hierarchy) 7 | - Calculate full pose for all keyframes in numpy format 8 | - Display skeleton 9 | - Display pose at any keyframe 10 | - Display full animation 11 | 12 | ![Skeleton](images/skeleton.png) 13 | ![Pose animation](images/keyframe.png) 14 | 15 | 16 | # How to use 17 | ```python 18 | anim = Bvh() 19 | anim.load('example.bvh') 20 | # get position at frame 11 21 | positions, rotations = anim.frame_pos(11) 22 | 23 | # get name of joints in order of appearance in array 24 | joint_names = anim.joint_names() 25 | 26 | # plot frame 11 27 | anim.plot_frame(11) 28 | ``` 29 | 30 | # Acknowledgements 31 | Thanks to Carnegie Mellon University for their [free motion capture database](http://mocap.cs.cmu.edu/) which is also available in [bvh format](https://sites.google.com/a/cgspeed.com/cgspeed/motion-capture/cmu-bvh-conversion). 32 | 33 | # Contribute 34 | If you find bugs or miss features, please contribute! (or use issues tab :-)) 35 | -------------------------------------------------------------------------------- /utils/one_euro_filter.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def smoothing_factor(t_e, cutoff): 5 | r = 2 * math.pi * cutoff * t_e 6 | return r / (r + 1) 7 | 8 | 9 | def exponential_smoothing(a, x, x_prev): 10 | return a * x + (1 - a) * x_prev 11 | 12 | 13 | class OneEuroFilter: 14 | def __init__(self, t0, x0, dx0=0.0, min_cutoff=1.0, beta=0.0, 15 | d_cutoff=1.0): 16 | """Initialize the one euro filter.""" 17 | # The parameters. 18 | self.min_cutoff = float(min_cutoff) 19 | self.beta = float(beta) 20 | self.d_cutoff = float(d_cutoff) 21 | # Previous values. 22 | self.x_prev = float(x0) 23 | self.dx_prev = float(dx0) 24 | self.t_prev = float(t0) 25 | 26 | def __call__(self, t, x): 27 | """Compute the filtered signal.""" 28 | t_e = t - self.t_prev 29 | 30 | # The filtered derivative of the signal. 31 | a_d = smoothing_factor(t_e, self.d_cutoff) 32 | dx = (x - self.x_prev) / t_e 33 | dx_hat = exponential_smoothing(a_d, dx, self.dx_prev) 34 | 35 | # The filtered signal. 36 | cutoff = self.min_cutoff + self.beta * abs(dx_hat) 37 | a = smoothing_factor(t_e, cutoff) 38 | x_hat = exponential_smoothing(a, x, self.x_prev) 39 | 40 | # Memorize the previous values. 41 | self.x_prev = x_hat 42 | self.dx_prev = dx_hat 43 | self.t_prev = t 44 | 45 | return x_hat -------------------------------------------------------------------------------- /MakeDataForOptimization/bvh_reader/read_egocentric_joint_position.py: -------------------------------------------------------------------------------- 1 | import open3d 2 | from utils.skeleton import Skeleton 3 | from npybvh.bvh import Bvh 4 | import numpy as np 5 | from tqdm import tqdm 6 | import pickle 7 | 8 | skeleton_model = Skeleton(None) 9 | anim = Bvh() 10 | egocentric_joints = [6, 15, 16, 17, 10, 11, 12, 23, 24, 25, 26, 19, 20, 21, 22] 11 | 12 | 13 | def parse_file(bvh_file_path, output_file_path, start_frame, input_frame_rate, output_frame_rate): 14 | anim.parse_file(bvh_file_path) 15 | gt_pose_seq = [] 16 | print(anim.frames) 17 | print(anim.joint_names()) 18 | step = round(input_frame_rate / output_frame_rate) 19 | for frame in tqdm(range(start_frame, anim.frames, step)): 20 | positions, rotations = anim.frame_pose(frame) 21 | 22 | positions = positions[egocentric_joints] 23 | positions = positions / 1000 24 | gt_pose_seq.append(positions) 25 | 26 | skeleton = skeleton_model.joints_2_mesh(positions) 27 | 28 | open3d.visualization.draw_geometries([skeleton]) 29 | gt_pose_seq = np.asarray(gt_pose_seq) 30 | # skeleton_list = skeleton_model.joint_list_2_mesh_list(gt_pose_seq) 31 | # open3d.visualization.draw_geometries(skeleton_list) 32 | with open(output_file_path, 'wb') as f: 33 | pickle.dump(gt_pose_seq, f) 34 | 35 | 36 | if __name__ == '__main__': 37 | parse_file(r'\\winfs-inf\HPS\Mo2Cap2Plus1\static00\CapturyData\GoProResults\captury\unknown.bvh', 38 | 'data/wild/wild.pkl', start_frame=0, input_frame_rate=50, output_frame_rate=25) -------------------------------------------------------------------------------- /utils/captury_studio_camera.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class CapturyCamera: 5 | def __init__(self, camera_path, camera_number): 6 | camera_data = self.load_camera_model(camera_path, camera_number) 7 | self.intrinsic = camera_data['intrinsic'] 8 | self.extrinsic = camera_data['extrinsic'] 9 | self.distortion = camera_data['distortion'] 10 | 11 | def get_camera_model(self): 12 | return self.intrinsic, self.extrinsic, self.distortion 13 | 14 | def load_camera_model(self, camera_path, camera_number): 15 | with open(camera_path) as f: 16 | lines = f.readlines() 17 | start_line_number = -1 18 | for i, line in enumerate(lines): 19 | if 'camera {}'.format(camera_number) in line: 20 | start_line_number = i 21 | break 22 | if start_line_number == -1: 23 | print('camera not found') 24 | camera_lines = lines[start_line_number: start_line_number + 27] 25 | distortion_line = camera_lines[11] 26 | extrinsic_lines = camera_lines[73 - 56: 76 - 56] 27 | intrinsic_lines = camera_lines[77 - 56: 80 - 56] 28 | distortions = distortion_line.split()[1:] 29 | distortions = np.asarray(distortions).astype(np.float) 30 | # print(distortions) 31 | extrinsic_lines = [line.split()[1:] for line in extrinsic_lines] 32 | intrinsic_lines = [line.split()[1:] for line in intrinsic_lines] 33 | 34 | extrinsic = np.asarray(extrinsic_lines).astype(np.float) 35 | intrinsic = np.asarray(intrinsic_lines).astype(np.float) 36 | 37 | return {'intrinsic': intrinsic, 38 | 'extrinsic': extrinsic, 39 | 'distortion': distortions} 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Egocentric Pose Optimizer 2 | 3 | The official implementation of paper: 4 | 5 | ### Estimating Egocentric 3D Human Pose in Global Space. 6 | 7 | [[project page]](https://vcai.mpi-inf.mpg.de/projects/globalegomocap/) 8 | 9 | If you find this repository useful, please cite: 10 | 11 | ``` 12 | @InProceedings{Wang_2021_ICCV, 13 | author = {Wang, Jian and Liu, Lingjie and Xu, Weipeng and Sarkar, Kripasindhu and Theobalt, Christian}, 14 | title = {Estimating Egocentric 3D Human Pose in Global Space}, 15 | booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, 16 | month = {October}, 17 | year = {2021}, 18 | pages = {11500-11509} 19 | } 20 | ``` 21 | 22 | ### Optimize the motion sequences in proposed dataset 23 | 24 | 1. Install pytorch 1.4+ with cuda support. 25 | 2. Run ```mkdir networks/logs``` and download the trained VAE model into directory ```networks/logs``` from [here](https://nextcloud.mpi-klsb.mpg.de/index.php/s/ibBB7TbEsWQrMJa). 26 | 3. Run ```mkdir data``` and download the processed test sequences into directory ```data``` from [here](https://nextcloud.mpi-klsb.mpg.de/index.php/s/kLNeAdbJzmSYKsZ). 27 | 4. Run the test on the sequences: 28 | ``` 29 | python optimize_whole_sequence.py --data_path data/jian3 30 | python optimize_whole_sequence.py --data_path data/studio-jian1 31 | python optimize_whole_sequence.py --data_path data/studio-jian2 32 | python optimize_whole_sequence.py --data_path data/studio-lingjie1 33 | python optimize_whole_sequence.py --data_path data/studio-lingjie2 34 | ``` 35 | 36 | ### Train the motion vae 37 | 38 | If you want to train the motion VAE, See directory ```networks``` 39 | 40 | ### prepare the data for optimization 41 | If you want to run on your own dataset, 42 | you need to firstly preprocess the data following the scripts in directory: ```MakeDataForOptimization```. 43 | Basically you need to prepare the following data: 44 | 1. Predicted human body heatmap in egocentric view. 45 | 2. Predicted human body joint depths in egocentric view. 46 | 3. Human body ground truth (for calculating MPJPEs). 47 | 4. Camera pose sequence from the OpenVSLAM method. 48 | 49 | All of these data are combined into a single pickle file for each sequence. 50 | 51 | -------------------------------------------------------------------------------- /networks/config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | parser = argparse.ArgumentParser(description='Estimate the ego-motion from single images') 4 | 5 | parser.add_argument('--train_data_path', default='/home/jianwang/ScanNet/static00/EgocentricAMASSPytorch/relative_global_pose_20_25.h5', 6 | type=str, help='train data dir') 7 | # 8 | parser.add_argument('--test_data_path', default='/home/jianwang/ScanNet/static00/EgocentricAMASSPytorch/relative_global_pose_20_25.h5', 9 | type=str, help='test data dir') 10 | 11 | parser.add_argument('--network', type=str, default='cnn', required=False, help='network type') 12 | 13 | parser.add_argument('--attention', type=str, default=None, required=False, help='attention type') 14 | 15 | parser.add_argument('--latent_dim', type=int, required=True, help='vae latent dimension') 16 | 17 | parser.add_argument('--with_mo2cap2_data', type=lambda x: (str(x).lower() == 'true'), required=True, default=False, help='use mo2cap2 dataset') 18 | 19 | parser.add_argument('--new_dataset', type=lambda x: (str(x).lower() == 'true'), required=True, default=True, help='use new dataset') 20 | 21 | parser.add_argument('--data_balance', type=lambda x: (str(x).lower() == 'true'), required=False, default=False, help='balance walking data') 22 | 23 | parser.add_argument('--slide_window_step', default=1, type=int, required=False, help='size of sample window') 24 | 25 | parser.add_argument('--seq_length', type=int, required=True, help='length of sequence') 26 | 27 | parser.add_argument('--fps', type=int, required=True, default=25, help='fps') 28 | 29 | parser.add_argument('--kl_weight', default=0.25, type=float, required=True, help='kl weight') 30 | 31 | parser.add_argument('--epoch', default=20, type=int, help='number of total epochs to run') 32 | 33 | parser.add_argument('--batch_size', default=64, type=int, help='batch size on every gpu') 34 | 35 | parser.add_argument('--num_workers', default=8, type=int, help='number of workers loading data') 36 | 37 | parser.add_argument('--learning_rate', default=1e-4, type=float, help='initial learning rate') 38 | 39 | parser.add_argument('--weight_decay', default=0, type=float, help='weight decay (default: 1e-4)') 40 | 41 | parser.add_argument('--log_dir', default=None, type=str, help='logging directory') 42 | 43 | 44 | 45 | parser.add_argument('--log_prefix', default=None, type=str, help='logging prefix') 46 | 47 | parser.add_argument('--log_step', default=100, type=int, help='logging step') 48 | 49 | parser.add_argument('--save_step', default=2000, type=int, help='saving step') 50 | 51 | args = parser.parse_args() -------------------------------------------------------------------------------- /utils/pose_visualization_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import open3d 3 | import numpy as np 4 | from scipy.spatial.transform import Rotation 5 | 6 | def get_sphere(position, radius=1.0, color=(0.1, 0.1, 0.7)): 7 | mesh_sphere: open3d.geometry.TriangleMesh = open3d.geometry.TriangleMesh.create_sphere(radius=radius) 8 | mesh_sphere.paint_uniform_color(color) 9 | 10 | # translate to position 11 | mesh_sphere = mesh_sphere.translate(position, relative=False) 12 | return mesh_sphere 13 | 14 | def rotation_matrix_from_vectors(vec1, vec2): 15 | """ Find the rotation matrix that aligns vec1 to vec2 16 | :param vec1: A 3d "source" vector 17 | :param vec2: A 3d "destination" vector 18 | :return mat: A transform matrix (3x3) which when applied to vec1, aligns it with vec2. 19 | """ 20 | a, b = (vec1 / np.linalg.norm(vec1)).reshape(3), (vec2 / np.linalg.norm(vec2)).reshape(3) 21 | v = np.cross(a, b) 22 | c = np.dot(a, b) 23 | s = np.linalg.norm(v) 24 | kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]]) 25 | rotation_matrix = np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s ** 2)) 26 | return rotation_matrix 27 | 28 | def get_cylinder(start_point, end_point, radius=0.3, color=(0.1, 0.9, 0.1)): 29 | center = (start_point + end_point) / 2 30 | height = np.linalg.norm(start_point - end_point) 31 | mesh_cylinder: open3d.geometry.TriangleMesh = open3d.geometry.TriangleMesh.create_cylinder(radius=radius, height=height) 32 | mesh_cylinder.paint_uniform_color(color) 33 | 34 | # translate and rotate to position 35 | # rotate vector 36 | rot_vec = end_point - start_point 37 | rot_vec = rot_vec / np.linalg.norm(rot_vec) 38 | rot_0 = np.array([0, 0, 1]) 39 | rot_mat = rotation_matrix_from_vectors(rot_0, rot_vec) 40 | # if open3d.__version__ >= '0.9.0.0': 41 | # rotation_param = rot_mat 42 | # else: 43 | # rotation_param = Rotation.from_matrix(rot_mat).as_euler('xyz') 44 | rotation_param = rot_mat 45 | mesh_cylinder = mesh_cylinder.rotate(rotation_param) 46 | mesh_cylinder = mesh_cylinder.translate(center, relative=False) 47 | return mesh_cylinder 48 | 49 | if __name__ == '__main__': 50 | point1 = np.array([-1, 11, 8]) 51 | point2 = np.array([12, -1, 5]) 52 | sphere1 = get_sphere(position=point1, radius=0.1) 53 | sphere2 = get_sphere(position=point2, radius=0.1) 54 | cylinder = get_cylinder(start_point=point1, end_point=point2, radius=0.02) 55 | 56 | mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5) 57 | 58 | open3d.visualization.draw_geometries( 59 | [sphere1, sphere2, cylinder, mesh_frame]) -------------------------------------------------------------------------------- /networks/sample.py: -------------------------------------------------------------------------------- 1 | import open3d 2 | import sys 3 | sys.path.append('..') 4 | import torch 5 | from models.SeqConvVAE import ConvVAE 6 | import pickle 7 | from utils.skeleton import Skeleton 8 | import os 9 | from tqdm import tqdm 10 | 11 | class Sample: 12 | def __init__(self, network_path, out_path, seq_length=20, latent_dim=2048): 13 | self.seq_length = seq_length 14 | self.out_path = out_path 15 | self.network = ConvVAE(in_channels=45, out_channels=45, latent_dim=latent_dim, seq_len=self.seq_length) 16 | state_dict = torch.load(network_path)['state_dict'] 17 | self.network.load_state_dict(state_dict) 18 | self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 19 | self.network = self.network.to(self.device) 20 | 21 | self.skeleton_model = Skeleton(None) 22 | 23 | def save_sample(self, sample, out_path): 24 | skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample) 25 | for i, skeleton in enumerate(skeleton_list): 26 | open3d.io.write_triangle_mesh(os.path.join(out_path, '{}.ply'.format(i)), skeleton) 27 | 28 | def show_example(self, sample): 29 | skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample) 30 | open3d.visualization.draw_geometries(skeleton_list) 31 | 32 | def sample(self, sample_num=10, visualization=False): 33 | sampled_result = self.network.sample(sample_num, current_device=self.device) 34 | sampled_result = sampled_result.cpu().detach_().numpy() 35 | sampled_result = sampled_result.reshape((sample_num, self.seq_length, 15, 3)) 36 | for i, sample in tqdm(enumerate(sampled_result)): 37 | print('sample: {}'.format(i)) 38 | if visualization is True: 39 | self.show_example(sample) 40 | else: 41 | out_path = os.path.join(self.out_path, 'sample_{}'.format(i)) 42 | if not os.path.isdir(out_path): 43 | os.mkdir(out_path) 44 | self.show_example(sample) 45 | self.save_sample(sample, out_path) 46 | 47 | if __name__ == '__main__': 48 | 49 | # sampler = Sample(network_path='logs/test/checkpoints/37.pth.tar', 50 | # out_path='out/sample') 51 | sampler = Sample(network_path='logs/only_local_full_dataset_latent_2048_len_10_kl_0.5_2/checkpoints/19.pth.tar', 52 | out_path='out/sample/slide_window_latent_2048_len_5_windows_size_5_kl_0.5', seq_length=10, 53 | latent_dim=2048) 54 | sampler.sample(sample_num=12, visualization=True) 55 | -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/pose_visualization_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import open3d 3 | import numpy as np 4 | from scipy.spatial.transform import Rotation 5 | 6 | def get_sphere(position, radius=1.0, color=(0.1, 0.1, 0.7)): 7 | mesh_sphere: open3d.geometry.TriangleMesh = open3d.geometry.TriangleMesh.create_sphere(radius=radius) 8 | mesh_sphere.paint_uniform_color(color) 9 | 10 | # translate to position 11 | mesh_sphere = mesh_sphere.translate(position, relative=False) 12 | return mesh_sphere 13 | 14 | def rotation_matrix_from_vectors(vec1, vec2): 15 | """ Find the rotation matrix that aligns vec1 to vec2 16 | :param vec1: A 3d "source" vector 17 | :param vec2: A 3d "destination" vector 18 | :return mat: A transform matrix (3x3) which when applied to vec1, aligns it with vec2. 19 | """ 20 | a, b = (vec1 / np.linalg.norm(vec1)).reshape(3), (vec2 / np.linalg.norm(vec2)).reshape(3) 21 | v = np.cross(a, b) 22 | c = np.dot(a, b) 23 | s = np.linalg.norm(v) 24 | kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]]) 25 | rotation_matrix = np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s ** 2)) 26 | return rotation_matrix 27 | 28 | def get_cylinder(start_point, end_point, radius=0.3, color=(0.1, 0.9, 0.1)): 29 | center = (start_point + end_point) / 2 30 | height = np.linalg.norm(start_point - end_point) 31 | mesh_cylinder: open3d.geometry.TriangleMesh = open3d.geometry.TriangleMesh.create_cylinder(radius=radius, height=height) 32 | mesh_cylinder.paint_uniform_color(color) 33 | 34 | # translate and rotate to position 35 | # rotate vector 36 | rot_vec = end_point - start_point 37 | rot_vec = rot_vec / np.linalg.norm(rot_vec) 38 | rot_0 = np.array([0, 0, 1]) 39 | rot_mat = rotation_matrix_from_vectors(rot_0, rot_vec) 40 | # if open3d.__version__ >= '0.9.0.0': 41 | # rotation_param = rot_mat 42 | # else: 43 | # rotation_param = Rotation.from_matrix(rot_mat).as_euler('xyz') 44 | rotation_param = rot_mat 45 | mesh_cylinder = mesh_cylinder.rotate(rotation_param) 46 | mesh_cylinder = mesh_cylinder.translate(center, relative=False) 47 | return mesh_cylinder 48 | 49 | if __name__ == '__main__': 50 | point1 = np.array([-1, 11, 8]) 51 | point2 = np.array([12, -1, 5]) 52 | sphere1 = get_sphere(position=point1, radius=0.1) 53 | sphere2 = get_sphere(position=point2, radius=0.1) 54 | cylinder = get_cylinder(start_point=point1, end_point=point2, radius=0.02) 55 | 56 | mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5) 57 | 58 | open3d.visualization.draw_geometries( 59 | [sphere1, sphere2, cylinder, mesh_frame]) -------------------------------------------------------------------------------- /utils/fisheye/FishEyeEquisolid.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import torch 4 | 5 | 6 | class FishEyeCameraEquisolid: 7 | def __init__(self, focal_length, sensor_size, img_size, use_gpu=False): 8 | """ 9 | @param focal_length: focal length of camera in mm 10 | @param sensor_size: sensor size of camera in mm 11 | @param img_size: image size of w, h 12 | @param use_gpu: whether use the gpu to accelerate the calculation 13 | """ 14 | self.sensor_size = sensor_size 15 | self.img_size = np.asarray(img_size) 16 | self.use_gpu = use_gpu 17 | # calculate the focal length in pixel 18 | self.focal_length = focal_length / np.max(sensor_size) * np.max(img_size) 19 | 20 | # calculate the image center 21 | self.img_center = self.img_size / 2 + 1e-10 22 | # get max distance from image center 23 | self.max_distance = self.focal_length * np.sqrt(2) 24 | 25 | if self.use_gpu: 26 | gpu_ok = torch.cuda.is_available() 27 | if gpu_ok is False: 28 | raise Exception("GPU is not available!") 29 | 30 | def camera2world(self, point: np.ndarray, depth: np.ndarray): 31 | """ 32 | @param point: 2d point in shape: (n, 2) 33 | @param depth: depth of every points 34 | @return: 3d position of every point 35 | """ 36 | # get the distance of point to center 37 | depth = depth.astype(np.float) 38 | point_centered = point.astype(np.float) - self.img_center 39 | x = point_centered[:, 0] 40 | y = point_centered[:, 1] 41 | distance_from_center = np.sqrt(np.square(x) + np.square(y)) 42 | distance_from_center[distance_from_center > self.max_distance - 30] = self.max_distance 43 | 44 | theta = 2 * np.arcsin(distance_from_center / (2 * self.focal_length)) 45 | Z = distance_from_center / np.tan(theta) 46 | 47 | # square_sin_theta_div_2 = np.square(distance_from_center / (2 * self.focal_length)) 48 | # tan_theta_div_1 = np.sqrt(1 / (4 * square_sin_theta_div_2 * (1 - square_sin_theta_div_2)) - 1) 49 | # Z = distance_from_center * tan_theta_div_1 50 | point_3d = np.array([x, y, Z]) 51 | norm = np.linalg.norm(point_3d, axis=0) 52 | point_3d = point_3d / norm * depth 53 | return point_3d.transpose() 54 | 55 | def camera2world_pytorch(self, point: torch.Tensor(), depth: torch.Tensor): 56 | pass 57 | 58 | 59 | if __name__ == '__main__': 60 | camera = FishEyeCameraEquisolid(focal_length=9, sensor_size=32, img_size=(1280, 1024)) 61 | point = np.array([[660, 20], [660, 20], ]) 62 | depth = np.array([10, 10]) 63 | point3d = camera.camera2world(point, depth) 64 | print(point3d) 65 | -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/rigid_transform_with_scale.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.linalg 3 | import random 4 | 5 | # Relevant links: 6 | # - http://stackoverflow.com/a/32244818/263061 (solution with scale) 7 | # - "Least-Squares Rigid Motion Using SVD" (no scale but easy proofs and explains how weights could be added) 8 | 9 | 10 | # Rigidly (+scale) aligns two point clouds with know point-to-point correspondences 11 | # with least-squares error. 12 | # Returns (scale factor c, rotation matrix R, translation vector t) such that 13 | # Q = P*cR + t 14 | # if they align perfectly, or such that 15 | # SUM over point i ( | P_i*cR + t - Q_i |^2 ) 16 | # is minimised if they don't align perfectly. 17 | 18 | def umeyama_ransac(P, Q, epsilon=0.4, n_iters=100): 19 | assert P.shape == Q.shape 20 | inliner_set = [] 21 | point_length = P.shape[0] 22 | for i in range(n_iters): 23 | sampled_points = random.sample(list(range(point_length)), 4) 24 | sampled_P = P[sampled_points] 25 | sampled_Q = Q[sampled_points] 26 | c, R, t = umeyama(sampled_P, sampled_Q) 27 | 28 | projected_P = P @ R * c + t 29 | new_inliner_set = [] 30 | for j in range(point_length): 31 | if np.linalg.norm(projected_P[j] - Q[j], ord=2) < epsilon: 32 | new_inliner_set.append(j) 33 | if len(new_inliner_set) > len(inliner_set): 34 | inliner_set = new_inliner_set 35 | 36 | sampled_P = P[inliner_set] 37 | sampled_Q = Q[inliner_set] 38 | c, R, t = umeyama(sampled_P, sampled_Q) 39 | return c, R, t 40 | 41 | def umeyama(P, Q): 42 | assert P.shape == Q.shape 43 | n, dim = P.shape 44 | 45 | centeredP = P - P.mean(axis=0) 46 | centeredQ = Q - Q.mean(axis=0) 47 | 48 | C = np.dot(np.transpose(centeredP), centeredQ) / n 49 | 50 | V, S, W = np.linalg.svd(C) 51 | d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0 52 | 53 | if d: 54 | S[-1] = -S[-1] 55 | V[:, -1] = -V[:, -1] 56 | 57 | R = np.dot(V, W) 58 | 59 | varP = np.var(P, axis=0).sum() 60 | c = 1/varP * np.sum(S) # scale factor 61 | 62 | t = Q.mean(axis=0) - P.mean(axis=0).dot(c*R) 63 | 64 | return c, R, t 65 | 66 | def umeyama_dim_2(P, Q): 67 | assert P.shape == Q.shape 68 | n, dim1 = P.shape 69 | 70 | centeredP = P 71 | centeredQ = Q 72 | 73 | C = np.dot(np.transpose(centeredP), centeredQ) / n 74 | 75 | V, S, W = np.linalg.svd(C) 76 | d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0 77 | 78 | if d: 79 | S[-1] = -S[-1] 80 | V[:, -1] = -V[:, -1] 81 | 82 | R = np.dot(V, W) 83 | 84 | varP = np.var(P, axis=0).sum() 85 | c = 1/varP * np.sum(S) # scale factor 86 | 87 | t = Q.mean(axis=0) - P.mean(axis=0).dot(c*R) 88 | 89 | return c, R, t -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/fisheye/FishEyeEquisolid.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import torch 4 | 5 | 6 | class FishEyeCameraEquisolid: 7 | def __init__(self, focal_length, sensor_size, img_size, use_gpu=False): 8 | """ 9 | @param focal_length: focal length of camera in mm 10 | @param sensor_size: sensor size of camera in mm 11 | @param img_size: image size of w, h 12 | @param use_gpu: whether use the gpu to accelerate the calculation 13 | """ 14 | self.sensor_size = sensor_size 15 | self.img_size = np.asarray(img_size) 16 | self.use_gpu = use_gpu 17 | # calculate the focal length in pixel 18 | self.focal_length = focal_length / np.max(sensor_size) * np.max(img_size) 19 | 20 | # calculate the image center 21 | self.img_center = self.img_size / 2 + 1e-10 22 | # get max distance from image center 23 | self.max_distance = self.focal_length * np.sqrt(2) 24 | 25 | if self.use_gpu: 26 | gpu_ok = torch.cuda.is_available() 27 | if gpu_ok is False: 28 | raise Exception("GPU is not available!") 29 | 30 | def camera2world(self, point: np.ndarray, depth: np.ndarray): 31 | """ 32 | @param point: 2d point in shape: (n, 2) 33 | @param depth: depth of every points 34 | @return: 3d position of every point 35 | """ 36 | # get the distance of point to center 37 | depth = depth.astype(np.float) 38 | point_centered = point.astype(np.float) - self.img_center 39 | x = point_centered[:, 0] 40 | y = point_centered[:, 1] 41 | distance_from_center = np.sqrt(np.square(x) + np.square(y)) 42 | distance_from_center[distance_from_center > self.max_distance - 30] = self.max_distance 43 | 44 | theta = 2 * np.arcsin(distance_from_center / (2 * self.focal_length)) 45 | Z = distance_from_center / np.tan(theta) 46 | 47 | # square_sin_theta_div_2 = np.square(distance_from_center / (2 * self.focal_length)) 48 | # tan_theta_div_1 = np.sqrt(1 / (4 * square_sin_theta_div_2 * (1 - square_sin_theta_div_2)) - 1) 49 | # Z = distance_from_center * tan_theta_div_1 50 | point_3d = np.array([x, y, Z]) 51 | norm = np.linalg.norm(point_3d, axis=0) 52 | point_3d = point_3d / norm * depth 53 | return point_3d.transpose() 54 | 55 | def camera2world_pytorch(self, point: torch.Tensor(), depth: torch.Tensor): 56 | pass 57 | 58 | 59 | if __name__ == '__main__': 60 | camera = FishEyeCameraEquisolid(focal_length=9, sensor_size=32, img_size=(1280, 1024)) 61 | point = np.array([[660, 20], [660, 20], ]) 62 | depth = np.array([10, 10]) 63 | point3d = camera.camera2world(point, depth) 64 | print(point3d) 65 | -------------------------------------------------------------------------------- /utils/pytorch_gmm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from math import pi 5 | from scipy.special import logsumexp 6 | from sklearn.mixture import GaussianMixture 7 | import pickle 8 | 9 | 10 | class GaussianMixturePytorch: 11 | """ 12 | Fits a mixture of k=1,..,K Gaussians to the input data (K is supplied via n_components). Input tensors are expected to be flat with dimensions (n: number of samples, d: number of features). 13 | The model then extends them to (n, 1, d). 14 | The model parametrization (mu, sigma) is stored as (1, k, d), and probabilities are shaped (n, k, 1) if they relate to an individual sample, or (1, k, 1) if they assign membership probabilities to one of the mixture components. 15 | """ 16 | 17 | def __init__(self, scipy_model_path): 18 | 19 | self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 20 | 21 | self.mu, self.precisions_cholesky, self.covariances = \ 22 | self.load_from_scipy_model(scipy_model_path) 23 | 24 | def load_from_scipy_model(self, scipy_model_path): 25 | with open(scipy_model_path, 'rb') as f: 26 | gmm: GaussianMixture = pickle.load(f) 27 | means = torch.from_numpy(gmm.means_).float().to(self.device) 28 | precisions_cholesky = torch.from_numpy(gmm.precisions_cholesky_).float().to(self.device) 29 | covariances = torch.from_numpy(gmm.covariances_).float().to(self.device) 30 | return means, precisions_cholesky, covariances 31 | 32 | def _estimate_log_prob(self, X): 33 | return self._estimate_log_gaussian_prob( 34 | X, self.mu, self.precisions_cholesky) 35 | 36 | def score_samples(self, X): 37 | X = X.to(self.device) 38 | return torch.logsumexp(self._estimate_log_prob(X), dim=1) 39 | 40 | def _compute_log_det_cholesky(self, matrix_chol, n_features): 41 | """ 42 | Compute the log-det of the cholesky decomposition of matrices. 43 | """ 44 | 45 | n_components, _, _ = matrix_chol.shape 46 | log_det_chol = (torch.sum(torch.log(matrix_chol.view(n_components, -1)[:, ::n_features + 1]), 1)) 47 | 48 | return log_det_chol 49 | 50 | def _estimate_log_gaussian_prob(self, X, means, precisions_chol): 51 | n_samples, n_features = X.shape 52 | n_components, _ = means.shape 53 | # det(precision_chol) is half of det(precision) 54 | log_det = self._compute_log_det_cholesky( 55 | precisions_chol, n_features) 56 | 57 | log_prob = torch.empty((n_samples, n_components)).to(self.device) 58 | for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)): 59 | y = torch.matmul(X, prec_chol) - torch.matmul(mu, prec_chol) 60 | log_prob[:, k] = torch.sum(torch.square(y), axis=1) 61 | 62 | return -.5 * (n_features * torch.log(torch.Tensor([2 * np.pi])).to(self.device) + log_prob) + log_det 63 | 64 | 65 | if __name__ == '__main__': 66 | gmm = GaussianMixturePytorch('../AMASSDataConverter/model_global_20_5.pkl') 67 | X = torch.randn(5, 225) 68 | res = gmm.score_samples(X) 69 | print(res) 70 | 71 | with open('../AMASSDataConverter/model_global_20_5.pkl', 'rb') as f: 72 | gmm_scipy: GaussianMixture = pickle.load(f) 73 | X = X.cpu().numpy() 74 | print(gmm_scipy.score_samples(X)) 75 | -------------------------------------------------------------------------------- /networks/get_latent.py: -------------------------------------------------------------------------------- 1 | import open3d 2 | import sys 3 | sys.path.append('..') 4 | import torch 5 | from SeqConvVAE import ConvVAE 6 | import pickle 7 | from utils.skeleton import Skeleton 8 | import os 9 | from tqdm import tqdm 10 | from dataset import AMASSDataset 11 | from dataset import Mo2Cap2Dataset 12 | from torch.utils.data import DataLoader 13 | import numpy as np 14 | 15 | class Sample: 16 | def __init__(self, network_path, out_path, seq_length=20, latent_dim=128): 17 | self.seq_length = seq_length 18 | self.out_path = out_path 19 | self.network = ConvVAE(in_channels=45, out_channels=45, latent_dim=latent_dim, seq_len=self.seq_length) 20 | state_dict = torch.load(network_path)['state_dict'] 21 | self.network.load_state_dict(state_dict) 22 | self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 23 | self.network = self.network.to(self.device) 24 | 25 | self.skeleton_model = Skeleton(None) 26 | 27 | # self.test_dataset = AMASSDataset(data_path='/home/wangjian/Develop/BodyPoseOptimization/AMASSDataConverter/pkls', 28 | # frame_num=self.seq_length, 29 | # is_train=False) 30 | 31 | self.test_dataset = Mo2Cap2Dataset( 32 | pkl_path='../test_data_weipeng_studio/data_start_1187_end_1287/test_data.pkl') 33 | 34 | self.test_dataloader = DataLoader(self.test_dataset, batch_size=4, shuffle=False, 35 | drop_last=False, num_workers=2) 36 | 37 | def save_sample(self, sample, out_path): 38 | skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample) 39 | for i, skeleton in enumerate(skeleton_list): 40 | open3d.io.write_triangle_mesh(os.path.join(out_path, '{}.ply'.format(i)), skeleton) 41 | 42 | def show_example(self, sample): 43 | skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample) 44 | open3d.visualization.draw_geometries(skeleton_list) 45 | 46 | def test(self, visualization=False): 47 | print('---------------------Start Eval-----------------------') 48 | self.network.eval() 49 | with torch.no_grad(): 50 | for i, relative_global_pose in tqdm(enumerate(self.test_dataloader)): 51 | relative_global_pose = relative_global_pose.to(self.device) 52 | relative_global_pose = relative_global_pose 53 | mu, std, z = self.network.get_latent_space(relative_global_pose) 54 | # print(mu) 55 | # print(std) 56 | # 57 | print('mu error is: {}'.format(torch.sum(torch.square(mu)))) 58 | print('std error is: {}'.format(torch.sum(torch.square(std - 1)))) 59 | print('---------------------------------------') 60 | 61 | 62 | pose_preds, pose_inputs, _, _ = self.network(relative_global_pose) 63 | mu, std, z = self.network.get_latent_space(pose_preds) 64 | print('vae refined mu error is: {}'.format(torch.sum(torch.square(mu)))) 65 | print('vae refined std error is: {}'.format(torch.sum(torch.square(std - 1)))) 66 | print('***********************************************') 67 | 68 | 69 | 70 | if __name__ == '__main__': 71 | 72 | # sampler = Sample(network_path='logs/test/checkpoints/37.pth.tar', 73 | # out_path='out/sample') 74 | sampler = Sample(network_path='logs/real_full_dataset_latent_2048_len_5_slide_window_step_1_kl_0.5/checkpoints/19.pth.tar', 75 | out_path='out/test', seq_length=5, latent_dim=2048) 76 | sampler.test(visualization=True) 77 | -------------------------------------------------------------------------------- /utils/rigid_transform_with_scale.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.linalg 3 | import random 4 | import torch 5 | 6 | # Relevant links: 7 | # - http://stackoverflow.com/a/32244818/263061 (solution with scale) 8 | # - "Least-Squares Rigid Motion Using SVD" (no scale but easy proofs and explains how weights could be added) 9 | 10 | 11 | # Rigidly (+scale) aligns two point clouds with know point-to-point correspondences 12 | # with least-squares error. 13 | # Returns (scale factor c, rotation matrix R, translation vector t) such that 14 | # Q = P*cR + t 15 | # if they align perfectly, or such that 16 | # SUM over point i ( | P_i*cR + t - Q_i |^2 ) 17 | # is minimised if they don't align perfectly. 18 | def umeyama(P, Q): 19 | assert P.shape == Q.shape 20 | n, dim = P.shape 21 | 22 | centeredP = P - P.mean(axis=0) 23 | centeredQ = Q - Q.mean(axis=0) 24 | 25 | C = np.dot(np.transpose(centeredP), centeredQ) / n 26 | 27 | 28 | 29 | V, S, W = np.linalg.svd(C) 30 | d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0 31 | 32 | if d: 33 | S[-1] = -S[-1] 34 | V[:, -1] = -V[:, -1] 35 | 36 | R = np.dot(V, W) 37 | 38 | varP = np.var(P, axis=0).sum() 39 | c = 1/varP * np.sum(S) # scale factor 40 | 41 | t = Q.mean(axis=0) - P.mean(axis=0).dot(c*R) 42 | 43 | return c, R, t 44 | 45 | def umeyama_pytorch(P, Q): 46 | assert P.shape == Q.shape 47 | n, dim = P.shape 48 | 49 | centeredP = P - torch.mean(P, dim=0) 50 | centeredQ = Q - torch.mean(Q, dim=0) 51 | 52 | C = centeredP.T @ centeredQ / n 53 | 54 | V, S, W = torch.svd(C) 55 | W = W.T 56 | d = (torch.det(V) * torch.det(W)) < 0.0 57 | 58 | if d: 59 | S[-1] = -S[-1] 60 | V[:, -1] = -V[:, -1] 61 | 62 | R = V @ W 63 | 64 | 65 | varP = torch.sum(torch.var(P, dim=0, unbiased=False)) 66 | c = 1 / varP * torch.sum(S) # scale factor 67 | 68 | t = torch.mean(Q, dim=0) - torch.mean(P, dim=0).matmul(c * R) 69 | 70 | return c, R, t 71 | 72 | def umeyama_ransac(P, Q, epsilon=0.2, n_iters=80): 73 | assert P.shape == Q.shape 74 | inliner_set = [] 75 | point_length = P.shape[0] 76 | for i in range(n_iters): 77 | sampled_points = random.sample(list(range(point_length)), 4) 78 | sampled_P = P[sampled_points] 79 | sampled_Q = Q[sampled_points] 80 | c, R, t = umeyama(sampled_P, sampled_Q) 81 | 82 | projected_P = P @ R * c + t 83 | new_inliner_set = [] 84 | for j in range(point_length): 85 | if np.linalg.norm(projected_P[j] - Q[j], ord=2) < epsilon: 86 | new_inliner_set.append(j) 87 | if len(new_inliner_set) > len(inliner_set): 88 | inliner_set = new_inliner_set 89 | 90 | sampled_P = P[inliner_set] 91 | sampled_Q = Q[inliner_set] 92 | c, R, t = umeyama(sampled_P, sampled_Q) 93 | return c, R, t 94 | 95 | def umeyama_dim_2(P, Q): 96 | assert P.shape == Q.shape 97 | n, dim1 = P.shape 98 | 99 | centeredP = P 100 | centeredQ = Q 101 | 102 | C = np.dot(np.transpose(centeredP), centeredQ) / n 103 | 104 | V, S, W = np.linalg.svd(C) 105 | d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0 106 | 107 | if d: 108 | S[-1] = -S[-1] 109 | V[:, -1] = -V[:, -1] 110 | 111 | R = np.dot(V, W) 112 | 113 | varP = np.var(P, axis=0).sum() 114 | c = 1/varP * np.sum(S) # scale factor 115 | 116 | t = Q.mean(axis=0) - P.mean(axis=0).dot(c*R) 117 | 118 | return c, R, t 119 | 120 | if __name__ == '__main__': 121 | a = np.random.normal(size=(15, 3)) 122 | b = np.random.normal(size=(15, 3)) 123 | 124 | result1 = umeyama(a.copy(), b.copy()) 125 | print(result1) 126 | result2 = umeyama_pytorch(torch.from_numpy(a), torch.from_numpy(b)) 127 | print(result2) -------------------------------------------------------------------------------- /utils/pytorch_gmm_from_scipy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from math import pi 5 | from scipy.special import logsumexp 6 | from sklearn.mixture import GaussianMixture 7 | import pickle 8 | 9 | 10 | class GaussianMixturePytorchFromScipy: 11 | """ 12 | Fits a mixture of k=1,..,K Gaussians to the input data (K is supplied via n_components). Input tensors are expected to be flat with dimensions (n: number of samples, d: number of features). 13 | The model then extends them to (n, 1, d). 14 | The model parametrization (mu, sigma) is stored as (1, k, d), and probabilities are shaped (n, k, 1) if they relate to an individual sample, or (1, k, 1) if they assign membership probabilities to one of the mixture components. 15 | """ 16 | 17 | def __init__(self, scipy_model_path): 18 | 19 | self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 20 | 21 | self.mu, self.precisions_cholesky, self.covariances, self.covariance_type = \ 22 | self.load_from_scipy_model(scipy_model_path) 23 | 24 | 25 | def load_from_scipy_model(self, scipy_model_path): 26 | with open(scipy_model_path, 'rb') as f: 27 | gmm: GaussianMixture = pickle.load(f) 28 | means = torch.from_numpy(gmm.means_).float().to(self.device) 29 | precisions_cholesky = torch.from_numpy(gmm.precisions_cholesky_).float().to(self.device) 30 | covariances = torch.from_numpy(gmm.covariances_).float().to(self.device) 31 | cov_type = gmm.covariance_type 32 | return means, precisions_cholesky, covariances, cov_type 33 | 34 | def _estimate_log_prob(self, X): 35 | return self._estimate_log_gaussian_prob( 36 | X, self.mu, self.precisions_cholesky) 37 | 38 | def score_samples(self, X): 39 | X = X.to(self.device) 40 | return torch.logsumexp(self._estimate_log_prob(X), dim=1) 41 | 42 | def _compute_log_det_cholesky(self, matrix_chol, n_features): 43 | """ 44 | Compute the log-det of the cholesky decomposition of matrices. 45 | """ 46 | if self.covariance_type == 'full': 47 | n_components, _, _ = matrix_chol.shape 48 | log_det_chol = (torch.sum(torch.log(matrix_chol.view(n_components, -1)[:, ::n_features + 1]), 1)) 49 | elif self.covariance_type == 'diag': 50 | log_det_chol = (torch.sum(torch.log(matrix_chol), axis=1)) 51 | 52 | return log_det_chol 53 | 54 | def _estimate_log_gaussian_prob(self, X, means, precisions_chol): 55 | n_samples, n_features = X.shape 56 | n_components, _ = means.shape 57 | # det(precision_chol) is half of det(precision) 58 | log_det = self._compute_log_det_cholesky( 59 | precisions_chol, n_features) 60 | 61 | if self.covariance_type == 'full': 62 | log_prob = torch.empty((n_samples, n_components)).to(self.device) 63 | for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)): 64 | y = torch.matmul(X, prec_chol) - torch.matmul(mu, prec_chol) 65 | log_prob[:, k] = torch.sum(torch.square(y), axis=1) 66 | elif self.covariance_type == 'diag': 67 | precisions = precisions_chol ** 2 68 | log_prob = (torch.sum((means ** 2 * precisions), 1) - 69 | 2. * torch.matmul(X, (means * precisions).T) + 70 | torch.matmul(X ** 2, precisions.T)) 71 | else: 72 | raise Exception('wrong covariance type!') 73 | 74 | return -.5 * (n_features * torch.log(torch.Tensor([2 * np.pi])).to(self.device) + log_prob) + log_det 75 | 76 | 77 | if __name__ == '__main__': 78 | gmm = GaussianMixturePytorchFromScipy('../AMASSDataConverter/model_global_ncomp_1_nframe_5_slidewindow_True_covtype_diag.pkl') 79 | X_pose = torch.randn(5, 5, 15, 3) 80 | X = X_pose.view(-1, 225) 81 | res = gmm.score_samples(X) 82 | conv = gmm.covariances.detach().cpu().numpy() 83 | print(res) 84 | 85 | with open('../AMASSDataConverter/model_global_ncomp_1_nframe_5_slidewindow_True_covtype_diag.pkl', 'rb') as f: 86 | gmm_scipy: GaussianMixture = pickle.load(f) 87 | X = X.cpu().numpy() 88 | print(gmm_scipy.score_samples(X)) 89 | -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import shutil 4 | import numpy as np 5 | from scipy.spatial.transform import Rotation 6 | 7 | def transformation_matrix_to_translation_and_rotation(mat): 8 | rotation_matrix = mat[:3, :3] 9 | translation = mat[:3, 3] 10 | 11 | # rotation matrix to rotation euler 12 | rotation_euler = Rotation.from_matrix(rotation_matrix).as_euler('xyz') 13 | return rotation_euler, translation 14 | 15 | 16 | def get_relative_transform(location1, rotation1, location2, rotation2): 17 | # input: location and rotation in blender coordinate 18 | # out: rotation, translation and transform matrix in OpenCV coordinate 19 | T_world2cv1, R_world2cv1, mat_world2cv1 = get_cv_rt_from_blender(location1, rotation1) 20 | T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_blender(location2, rotation2) 21 | 22 | mat_cv1_2world = np.linalg.inv(mat_world2cv1) 23 | 24 | mat_cv1_to_cv2 = mat_cv1_2world.dot(mat_world2cv2) 25 | # mat cv1 to cv2 is the coordinate transformation, we need to change it to the object transformation 26 | mat_cv2_to_cv1 = np.linalg.inv(mat_cv1_to_cv2) 27 | 28 | rotation, translation = transformation_matrix_to_translation_and_rotation(mat_cv2_to_cv1) 29 | return rotation, translation, mat_cv2_to_cv1 30 | 31 | 32 | def get_transform_relative_to_base_cv(base_location, base_rotation, location, rotation): 33 | # input: location and rotation in blender coordinate 34 | # out: rotation, translation and transform matrix in OpenCV coordinate 35 | T_world2cv_base, R_world2cv_base, mat_world2cv_base = get_cv_rt_from_cv(base_location, base_rotation) 36 | T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_cv(location, rotation) 37 | 38 | # mat_cv2world2 = np.linalg.inv(mat_world2cv2) 39 | # location_cv_homo = mat_cv2world2[:, 3] 40 | 41 | location_cv_homo = np.concatenate([location, np.ones(1)]) 42 | 43 | R_cv2_2_base = R_world2cv2.T.dot(R_world2cv_base) 44 | new_rotation_euler = Rotation.from_matrix(R_cv2_2_base).as_euler(seq='xyz') 45 | 46 | new_location_homo = mat_world2cv_base.dot(location_cv_homo) 47 | new_location = new_location_homo[:3] 48 | 49 | return new_location, new_rotation_euler 50 | 51 | def get_transform_relative_to_base_blender(base_location, base_rotation, location, rotation): 52 | T_world2cv_base, R_world2cv_base, mat_world2cv_base = get_cv_rt_from_blender(base_location, base_rotation) 53 | T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_blender(location, rotation) 54 | 55 | location_cv_homo = np.concatenate([location, np.ones(1)]) 56 | 57 | R_cv2_2_base = R_world2cv2.T.dot(R_world2cv_base) 58 | new_rotation_euler = Rotation.from_matrix(R_cv2_2_base).as_euler(seq='xyz') 59 | 60 | new_location_homo = mat_world2cv_base.dot(location_cv_homo) 61 | new_location = new_location_homo[:3] 62 | 63 | return new_location, new_rotation_euler 64 | 65 | # code modified from zaw lin 66 | def get_cv_rt_from_blender(location, rotation): 67 | # bcam stands for blender camera 68 | R_bcam2cv = np.array( 69 | [[1, 0, 0], 70 | [0, -1, 0], 71 | [0, 0, -1]]) 72 | 73 | # Transpose since the rotation is object rotation, 74 | # and we want coordinate rotation 75 | # R_world2bcam = cam.rotation_euler.to_matrix().transposed() 76 | # T_world2bcam = -1*R_world2bcam * location 77 | # 78 | # Use matrix_world instead to account for all constraints 79 | 80 | R_world2bcam = Rotation.from_euler('xyz', rotation, degrees=False).as_matrix().T 81 | 82 | # Convert camera location to translation vector used in coordinate changes 83 | # T_world2bcam = -1*R_world2bcam*cam.location 84 | # Use location from matrix_world to account for constraints: 85 | T_world2bcam = -1 * R_world2bcam.dot(location) 86 | 87 | # Build the coordinate transform matrix from world to computer vision camera 88 | R_world2cv = R_bcam2cv.dot(R_world2bcam) 89 | T_world2cv = R_bcam2cv.dot(T_world2bcam) 90 | 91 | #put into 3x4 matrix 92 | mat = np.array([ 93 | np.concatenate([R_world2cv[0], [T_world2cv[0]]]), 94 | np.concatenate([R_world2cv[1], [T_world2cv[1]]]), 95 | np.concatenate([R_world2cv[2], [T_world2cv[2]]]), 96 | [0, 0, 0, 1] 97 | ]) 98 | return T_world2cv, R_world2cv, mat 99 | 100 | # code modified from zaw lin 101 | def get_cv_rt_from_cv(location, rotation): 102 | 103 | # Transpose since the rotation is object rotation, 104 | # and we want coordinate rotation 105 | # R_world2bcam = cam.rotation_euler.to_matrix().transposed() 106 | # T_world2bcam = -1*R_world2bcam * location 107 | # 108 | # Use matrix_world instead to account for all constraints 109 | 110 | R_world2cv = Rotation.from_euler('xyz', rotation, degrees=False).as_matrix().T 111 | 112 | # Convert camera location to translation vector used in coordinate changes 113 | # T_world2bcam = -1*R_world2bcam*cam.location 114 | # Use location from matrix_world to account for constraints: 115 | T_world2cv = -1 * R_world2cv.dot(location) 116 | 117 | #put into 3x4 matrix 118 | mat = np.array([ 119 | np.concatenate([R_world2cv[0], [T_world2cv[0]]]), 120 | np.concatenate([R_world2cv[1], [T_world2cv[1]]]), 121 | np.concatenate([R_world2cv[2], [T_world2cv[2]]]), 122 | [0, 0, 0, 1] 123 | ]) 124 | return T_world2cv, R_world2cv, mat -------------------------------------------------------------------------------- /networks/make_dataset.py: -------------------------------------------------------------------------------- 1 | import open3d 2 | import torch 3 | from torch.utils.data.dataset import Dataset 4 | from torch.utils.data.dataloader import DataLoader 5 | import sys 6 | sys.path.append('..') 7 | from tqdm import tqdm 8 | import pickle 9 | import os 10 | from utils.utils import get_relative_global_pose, get_relative_global_pose_with_camera_matrix, trans_qrot_to_matrix 11 | import numpy as np 12 | import h5py 13 | 14 | 15 | class HDF5Store(object): 16 | """ 17 | Simple class to append value to a hdf5 file on disc (usefull for building keras datasets) 18 | 19 | Params: 20 | datapath: filepath of h5 file 21 | dataset: dataset name within the file 22 | shape: dataset shape (not counting main/batch axis) 23 | dtype: numpy dtype 24 | 25 | Usage: 26 | hdf5_store = HDF5Store('/tmp/hdf5_store.h5','X', shape=(20,20,3)) 27 | x = np.random.random(hdf5_store.shape) 28 | hdf5_store.append(x) 29 | hdf5_store.append(x) 30 | 31 | From https://gist.github.com/wassname/a0a75f133831eed1113d052c67cf8633 32 | """ 33 | 34 | def __init__(self, datapath, dataset_shape_dict: dict, dtype=np.float32): 35 | self.datapath = datapath 36 | self.dataset_shape_dict = dataset_shape_dict 37 | 38 | with h5py.File(self.datapath, mode='w') as h5f: 39 | for dataset_name in self.dataset_shape_dict.keys(): 40 | h5f.create_dataset( 41 | dataset_name, 42 | shape=(0,) + self.dataset_shape_dict[dataset_name], 43 | maxshape=(None,) + self.dataset_shape_dict[dataset_name], 44 | dtype=dtype) 45 | 46 | def append(self, values_batch_dict: dict): 47 | # print('append data!') 48 | with h5py.File(self.datapath, mode='a') as h5f: 49 | for dataset_name, values_batch in values_batch_dict.items(): 50 | batch_size = values_batch.shape[0] 51 | dset = h5f[dataset_name] 52 | dset_len = dset.shape[0] 53 | shape = dset.shape[1:] 54 | dset.resize((dset_len + batch_size,) + shape) 55 | for i in range(dset_len, dset_len + batch_size): 56 | dset[i] = [values_batch[i - dset_len]] 57 | h5f.flush() 58 | 59 | 60 | def main(source_dir, output_path, frame_num, fps, slide_window): 61 | pkl_path_list = os.listdir(source_dir) 62 | dataset_shape_dict = {"relative_global_pose": (frame_num, 15, 3), 63 | "local_pose": (frame_num, 15, 3), 64 | "camera_matrix": (frame_num, 4, 4)} 65 | hdf5_store = HDF5Store(output_path, dataset_shape_dict) 66 | for file_name in tqdm(pkl_path_list): 67 | file_path = os.path.join(source_dir, file_name) 68 | relative_global_pose_seq_list, local_pose_seq_list, camera_matrix_seq_list = \ 69 | get_relative_global_pose_list(file_path, frame_num, fps, slide_window) 70 | values_batch_dict = {"relative_global_pose": relative_global_pose_seq_list, 71 | "local_pose": local_pose_seq_list, 72 | "camera_matrix": camera_matrix_seq_list} 73 | hdf5_store.append(values_batch_dict) 74 | 75 | 76 | def interpolate(original_sequence, interpolate_time=5): 77 | result_list = [] 78 | for i in range(len(original_sequence) - 1): 79 | local_pose_i = original_sequence[i] 80 | local_pose_i_1 = original_sequence[i + 1] 81 | diff = local_pose_i_1 - local_pose_i 82 | res = np.empty(shape=(interpolate_time, ) + local_pose_i.shape) 83 | for j in range(interpolate_time): 84 | res[j] = local_pose_i + diff * j / interpolate_time 85 | result_list.append(res) 86 | return np.concatenate(result_list, axis=0) 87 | 88 | def get_relative_global_pose_list(data_path, frame_num=5, fps=25, slide_window=True): 89 | total_frame_number = frame_num 90 | relative_global_pose_seq_list = [] 91 | 92 | local_pose_seq_list = [] 93 | camera_seq_list = [] 94 | camera_matrix_seq_list = [] 95 | with open(data_path, 'rb') as f: 96 | seq_data = pickle.load(f) 97 | seq_len = len(seq_data['local_pose_list']) 98 | data_frame_rate = int(seq_data['frame_rate']) 99 | frame_rate_timer = round(data_frame_rate / fps) 100 | if slide_window is True: 101 | interval = 1 102 | else: 103 | interval = total_frame_number * frame_rate_timer 104 | for i in range(0, seq_len - total_frame_number * frame_rate_timer, interval): 105 | local_pose_seq_list.append( 106 | seq_data['local_pose_list'][i: i + total_frame_number * frame_rate_timer: frame_rate_timer]) 107 | camera_seq_list.append( 108 | seq_data['cam_list'][i: i + total_frame_number * frame_rate_timer: frame_rate_timer]) 109 | for local_pose_seq, camera_seq in zip(local_pose_seq_list, camera_seq_list): 110 | relative_global_pose_seq = get_relative_global_pose(local_pose_seq, camera_seq) 111 | camera_matrix_seq = [] 112 | for camera in camera_seq: 113 | camera_matrix = trans_qrot_to_matrix(camera['loc'], camera['rot']) 114 | camera_matrix_seq.append(camera_matrix) 115 | camera_matrix_seq_list.append(camera_matrix_seq) 116 | # select {frame_num} pose from relative global pose list 117 | final_relative_global_pose_seq = [] 118 | for i in range(0, len(relative_global_pose_seq)): 119 | final_relative_global_pose_seq.append(relative_global_pose_seq[i]) 120 | 121 | relative_global_pose_seq_list.append(final_relative_global_pose_seq) 122 | 123 | return np.asarray(relative_global_pose_seq_list), np.asarray(local_pose_seq_list), np.asarray(camera_matrix_seq_list) 124 | 125 | 126 | if __name__ == '__main__': 127 | source_dir = '/home/jianwang/ScanNet/static00/EgocentricAMASS' 128 | 129 | frame_num, fps, slide_window = 10, 25, True 130 | target_file_path = '/home/jianwang/ScanNet/static00/EgocentricAMASSPytorch/local_relative_global_pose_{}_{}.h5'.format(frame_num, fps) 131 | main(source_dir, target_file_path, frame_num, fps, slide_window) -------------------------------------------------------------------------------- /networks/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('..') 4 | import torch 5 | import numpy as np 6 | 7 | from config import args 8 | import datetime 9 | import os 10 | from torch.utils.data import DataLoader 11 | from tqdm import tqdm 12 | from torch.utils.tensorboard import SummaryWriter 13 | from torch.optim import Adam 14 | from pprint import pprint 15 | from models.SeqConvVAE import ConvVAE 16 | from dataset.global_dataset import AMASSDataset 17 | 18 | 19 | def prepare_log_dir(log_dir=None): 20 | # prepare log dirs 21 | if args.log_dir is None and log_dir is None: 22 | log_dir = datetime.datetime.now().strftime('%m.%d-%H:%M:%S') 23 | elif log_dir is None: 24 | log_dir = args.log_dir 25 | log_dir = os.path.join('logs', log_dir) 26 | print('making save dir at: {}'.format(log_dir)) 27 | os.makedirs(log_dir) 28 | checkpoints_dir = os.path.join(log_dir, 'checkpoints') 29 | tensorboard_dir = os.path.join(log_dir, 'tensorboard') 30 | os.makedirs(checkpoints_dir) 31 | os.makedirs(tensorboard_dir) 32 | return checkpoints_dir, tensorboard_dir 33 | 34 | 35 | class Train: 36 | def __init__(self): 37 | 38 | self.seq_length = args.seq_length 39 | self.attention = args.attention 40 | 41 | self.amass_train_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=True, 42 | windows_size=args.slide_window_step, balance_distrib=args.data_balance, 43 | with_mo2cap2_data=args.with_mo2cap2_data) 44 | self.train_dataloader = DataLoader(dataset=self.amass_train_dataset, batch_size=args.batch_size, shuffle=True, 45 | drop_last=True, num_workers=args.num_workers) 46 | 47 | self.test_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=False, 48 | windows_size=args.slide_window_step, balance_distrib=args.data_balance, 49 | with_mo2cap2_data=args.with_mo2cap2_data) 50 | self.test_dataloader = DataLoader(self.test_dataset, batch_size=args.batch_size, shuffle=False, 51 | drop_last=False, num_workers=args.num_workers) 52 | 53 | self.network = ConvVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim, 54 | seq_len=self.seq_length) 55 | 56 | self.optimizer = Adam(params=self.network.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) 57 | 58 | self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 59 | 60 | self.network = self.network.to(self.device) 61 | 62 | self.checkpoint_dir, tensorboard_dir = prepare_log_dir() 63 | self.tensorboard_writer = SummaryWriter(tensorboard_dir) 64 | 65 | def train(self): 66 | print('---------------------Start Training-----------------------') 67 | pprint(args.__dict__) 68 | running_loss = 0 69 | running_recon_loss = 0 70 | count = 0 71 | for e in range(args.epoch): 72 | self.network.train() 73 | print('-----------------Epoch: {}-----------------'.format(str(e))) 74 | for i, relative_global_pose in tqdm(enumerate(self.train_dataloader)): 75 | self.optimizer.zero_grad() 76 | relative_global_pose = relative_global_pose.to(self.device) 77 | 78 | pose_preds, pose_input, mu, log_var = self.network(relative_global_pose) 79 | loss, recon_loss, kld_loss = self.network.loss_function(pose_preds, pose_input, mu, log_var, 80 | M_N=args.kl_weight * args.batch_size / len( 81 | self.amass_train_dataset)) 82 | loss.backward() 83 | self.optimizer.step() 84 | 85 | running_loss += loss.item() 86 | running_recon_loss += recon_loss.item() 87 | # logs 88 | if count % args.log_step == 0 and count != 0: 89 | # ...log the running loss 90 | self.tensorboard_writer.add_scalar('training loss', running_loss, count) 91 | self.tensorboard_writer.add_scalar('training recon loss', running_recon_loss, count) 92 | print("running loss is: {}".format(running_loss)) 93 | print("running recon loss is: {}".format(running_recon_loss)) 94 | running_loss = 0 95 | running_recon_loss = 0 96 | count += 1 97 | # log the logs 98 | eval_loss = self.eval() 99 | self.tensorboard_writer.add_scalar('eval loss', eval_loss, e) 100 | print('eval loss is: {}'.format(eval_loss)) 101 | 102 | torch.save({ 103 | 'epoch': e + 1, 104 | 'args': args.__dict__, 105 | 'state_dict': self.network.state_dict(), 106 | 'eval_result': eval_loss, 107 | 'optimizer': self.optimizer.state_dict(), 108 | }, os.path.join(self.checkpoint_dir, str(e) + '.pth.tar')) 109 | 110 | def eval(self): 111 | print('---------------------Start Eval-----------------------') 112 | self.network.eval() 113 | mpjpe_list = [] 114 | with torch.no_grad(): 115 | for i, relative_global_pose in tqdm(enumerate(self.test_dataloader)): 116 | relative_global_pose = relative_global_pose.to(self.device) 117 | pose_preds, pose_input, _, _ = self.network(relative_global_pose) 118 | pose_preds = pose_preds.cpu().numpy() 119 | pose_input = pose_input.cpu().numpy() 120 | pose_preds = np.reshape(pose_preds, [-1, self.seq_length, 15, 3]) 121 | pose_gt = np.reshape(pose_input, [-1, self.seq_length, 15, 3]) 122 | mpjpe_list.append(self.mpjpe(pose_preds, pose_gt)) 123 | 124 | print('MPJPE is: {}'.format(np.mean(mpjpe_list))) 125 | return np.mean(mpjpe_list) 126 | 127 | def mpjpe(self, pose_preds, pose_gt): 128 | distance = np.linalg.norm(pose_gt - pose_preds, axis=3) 129 | return np.mean(distance) 130 | 131 | 132 | if __name__ == '__main__': 133 | train = Train() 134 | train.train() 135 | -------------------------------------------------------------------------------- /utils/fisheye/FishEyeCalibrated.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import torch 4 | from copy import deepcopy 5 | 6 | class FishEyeCameraCalibrated: 7 | def __init__(self, calibration_file_path, use_gpu=False): 8 | with open(calibration_file_path) as f: 9 | calibration_data = json.load(f) 10 | self.intrinsic = np.array(calibration_data['intrinsic']) 11 | self.img_size = np.array(calibration_data['size']) # w, h 12 | self.fisheye_polynomial = np.array(calibration_data['polynomialC2W']) 13 | self.fisheye_inverse_polynomial = np.array(calibration_data['polynomialW2C']) 14 | self.img_center = np.array([self.intrinsic[0][2], self.intrinsic[1][2]]) 15 | self.use_gpu = use_gpu 16 | # self.img_center = np.array([self.img_size[0] / 2, self.img_size[1] / 2]) 17 | 18 | def camera2world(self, point: np.ndarray, depth: np.ndarray): 19 | """ 20 | point: np.ndarray of 2D points on image (n * 2) 21 | depth: np.ndarray of depth of every 2D points (n) 22 | """ 23 | depth = depth.astype(np.float) 24 | point_centered = point.astype(np.float) - self.img_center 25 | x = point_centered[:, 0] 26 | y = point_centered[:, 1] 27 | distance_from_center = np.sqrt(np.square(x) + np.square(y)) 28 | 29 | z = np.polyval(p=self.fisheye_polynomial[::-1], x=distance_from_center) 30 | point_3d = np.array([x, y, -z]) # 3, n 31 | norm = np.linalg.norm(point_3d, axis=0) 32 | point_3d = point_3d / norm * depth 33 | return point_3d.transpose() 34 | 35 | def getPolyVal(self, p, x): 36 | curVal = torch.zeros_like(x) 37 | for curValIndex in range(len(p) - 1): 38 | curVal = (curVal + p[curValIndex]) * x 39 | return curVal + p[len(p) - 1] 40 | 41 | def camera2world_pytorch(self, point: torch.Tensor, depth: torch.Tensor): 42 | """ 43 | point: np.ndarray of 2D points on image (n * 2) 44 | depth: np.ndarray of depth of every 2D points (n) 45 | """ 46 | point_centered = point.float() - self.img_center 47 | x = point_centered[:, 0] 48 | y = point_centered[:, 1] 49 | distance_from_center = torch.sqrt(torch.square(x) + torch.square(y)) 50 | 51 | z = self.getPolyVal(p=self.fisheye_polynomial[::-1], x=distance_from_center) 52 | point_3d = torch.stack([x, y, -z]).float() # 3, n 53 | norm = torch.norm(point_3d, dim=0) 54 | point_3d = point_3d / norm * depth.float() 55 | return point_3d.t() 56 | 57 | def world2camera(self, point3D): 58 | point3D = deepcopy(point3D) 59 | point3D[:, 2] = point3D[:, 2] * -1 60 | point3D = point3D.T 61 | xc, yc = self.img_center[0], self.img_center[1] 62 | point2D = [] 63 | 64 | norm = np.linalg.norm(point3D[:2], axis=0) 65 | 66 | if (norm != 0).all(): 67 | theta = np.arctan(point3D[2] / norm) 68 | invnorm = 1.0 / norm 69 | t = theta 70 | rho = self.fisheye_inverse_polynomial[0] 71 | t_i = 1.0 72 | 73 | for i in range(1, len(self.fisheye_inverse_polynomial)): 74 | t_i *= t 75 | rho += t_i * self.fisheye_inverse_polynomial[i] 76 | 77 | x = point3D[0] * invnorm * rho 78 | y = point3D[1] * invnorm * rho 79 | 80 | point2D.append(x + xc) 81 | point2D.append(y + yc) 82 | else: 83 | point2D.append(xc) 84 | point2D.append(yc) 85 | raise Exception("norm is zero!") 86 | 87 | return np.asarray(point2D).T 88 | 89 | def world2camera_with_depth(self, point3D): 90 | point3D_cloned = deepcopy(point3D) 91 | point2D = self.world2camera(point3D_cloned) 92 | 93 | depth = np.linalg.norm(point3D, axis=-1) 94 | return point2D, depth 95 | 96 | def world2camera_pytorch(self, point3d_original: torch.Tensor): 97 | fisheye_inv_polynomial = self.fisheye_inverse_polynomial 98 | point3d = point3d_original.clone() 99 | point3d[:, 2] = point3d_original[:, 2] * -1 100 | point3d = point3d.transpose(0, 1) 101 | xc, yc = self.img_center[0], self.img_center[1] 102 | xc = torch.Tensor([xc]).float().to(point3d.device) 103 | yc = torch.Tensor([yc]).float().to(point3d.device) 104 | point2d = torch.empty((2, point3d.shape[-1])).to(point3d.device) 105 | 106 | norm = torch.norm(point3d[:2], dim=0) 107 | 108 | if (norm != 0).all(): 109 | theta = torch.atan(point3d[2] / norm) 110 | invnorm = 1.0 / norm 111 | t = theta 112 | rho = fisheye_inv_polynomial[0] 113 | t_i = 1.0 114 | 115 | for i in range(1, len(fisheye_inv_polynomial)): 116 | t_i *= t 117 | rho += t_i * fisheye_inv_polynomial[i] 118 | 119 | x = point3d[0] * invnorm * rho 120 | y = point3d[1] * invnorm * rho 121 | 122 | point2d[0] = x + xc 123 | point2d[1] = y + yc 124 | else: 125 | point2d[0] = xc 126 | point2d[1] = yc 127 | raise Exception("norm is zero!") 128 | 129 | return point2d.transpose(0, 1) 130 | 131 | def undistort(self, point_2Ds): 132 | """ 133 | undistort the input 2d points in fisheye camera 134 | """ 135 | point_length = point_2Ds.shape[0] 136 | depths = np.ones(shape=point_length) 137 | point_3Ds = self.camera2world(point_2Ds, depths) 138 | 139 | # point_3Ds_homo = np.ones((point_length, 4)) 140 | # point_3Ds_homo[:, :3] = point_3Ds 141 | 142 | projected_2d_points = (self.intrinsic[:3, :3] @ point_3Ds.T).T 143 | projected_2d_points = projected_2d_points[:, :2] / projected_2d_points[:, 2:] 144 | return projected_2d_points 145 | 146 | 147 | 148 | 149 | if __name__ == '__main__': 150 | camera = FishEyeCameraCalibrated('fisheye.calibration.json') 151 | point = np.array([[660, 520], [520, 660], [123, 456]]) 152 | depth = np.array([30, 30, 40]) 153 | point = torch.from_numpy(point) 154 | depth = torch.from_numpy(depth) 155 | point3d = camera.camera2world_pytorch(point, depth) 156 | print(point3d) 157 | 158 | # reprojected_point_2d = camera.world2camera(point3d) 159 | # print(reprojected_point_2d) 160 | 161 | reprojected_point_2d = camera.world2camera(point3d.numpy()) 162 | print(reprojected_point_2d) 163 | -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/fisheye/FishEyeCalibrated.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import torch 4 | from copy import deepcopy 5 | 6 | class FishEyeCameraCalibrated: 7 | def __init__(self, calibration_file_path, use_gpu=False): 8 | with open(calibration_file_path) as f: 9 | calibration_data = json.load(f) 10 | self.intrinsic = np.array(calibration_data['intrinsic']) 11 | self.img_size = np.array(calibration_data['size']) # w, h 12 | self.fisheye_polynomial = np.array(calibration_data['polynomialC2W']) 13 | self.fisheye_inverse_polynomial = np.array(calibration_data['polynomialW2C']) 14 | self.img_center = np.array([self.intrinsic[0][2], self.intrinsic[1][2]]) 15 | self.use_gpu = use_gpu 16 | # self.img_center = np.array([self.img_size[0] / 2, self.img_size[1] / 2]) 17 | 18 | def camera2world(self, point: np.ndarray, depth: np.ndarray): 19 | """ 20 | point: np.ndarray of 2D points on image (n * 2) 21 | depth: np.ndarray of depth of every 2D points (n) 22 | """ 23 | depth = depth.astype(np.float) 24 | point_centered = point.astype(np.float) - self.img_center 25 | x = point_centered[:, 0] 26 | y = point_centered[:, 1] 27 | distance_from_center = np.sqrt(np.square(x) + np.square(y)) 28 | 29 | z = np.polyval(p=self.fisheye_polynomial[::-1], x=distance_from_center) 30 | point_3d = np.array([x, y, -z]) # 3, n 31 | norm = np.linalg.norm(point_3d, axis=0) 32 | point_3d = point_3d / norm * depth 33 | return point_3d.transpose() 34 | 35 | def getPolyVal(self, p, x): 36 | curVal = torch.zeros_like(x) 37 | for curValIndex in range(len(p) - 1): 38 | curVal = (curVal + p[curValIndex]) * x 39 | return curVal + p[len(p) - 1] 40 | 41 | def camera2world_pytorch(self, point: torch.Tensor, depth: torch.Tensor): 42 | """ 43 | point: np.ndarray of 2D points on image (n * 2) 44 | depth: np.ndarray of depth of every 2D points (n) 45 | """ 46 | point_centered = point.float() - self.img_center 47 | x = point_centered[:, 0] 48 | y = point_centered[:, 1] 49 | distance_from_center = torch.sqrt(torch.square(x) + torch.square(y)) 50 | 51 | z = self.getPolyVal(p=self.fisheye_polynomial[::-1], x=distance_from_center) 52 | point_3d = torch.stack([x, y, -z]).float() # 3, n 53 | norm = torch.norm(point_3d, dim=0) 54 | point_3d = point_3d / norm * depth.float() 55 | return point_3d.t() 56 | 57 | def world2camera(self, point3D): 58 | point3D = deepcopy(point3D) 59 | point3D[:, 2] = point3D[:, 2] * -1 60 | point3D = point3D.T 61 | xc, yc = self.img_center[0], self.img_center[1] 62 | point2D = [] 63 | 64 | norm = np.linalg.norm(point3D[:2], axis=0) 65 | 66 | if (norm != 0).all(): 67 | theta = np.arctan(point3D[2] / norm) 68 | invnorm = 1.0 / norm 69 | t = theta 70 | rho = self.fisheye_inverse_polynomial[0] 71 | t_i = 1.0 72 | 73 | for i in range(1, len(self.fisheye_inverse_polynomial)): 74 | t_i *= t 75 | rho += t_i * self.fisheye_inverse_polynomial[i] 76 | 77 | x = point3D[0] * invnorm * rho 78 | y = point3D[1] * invnorm * rho 79 | 80 | point2D.append(x + xc) 81 | point2D.append(y + yc) 82 | else: 83 | point2D.append(xc) 84 | point2D.append(yc) 85 | raise Exception("norm is zero!") 86 | 87 | return np.asarray(point2D).T 88 | 89 | def world2camera_with_depth(self, point3D): 90 | point3D_cloned = deepcopy(point3D) 91 | point2D = self.world2camera(point3D_cloned) 92 | 93 | depth = np.linalg.norm(point3D, axis=-1) 94 | return point2D, depth 95 | 96 | def world2camera_pytorch(self, point3d_original: torch.Tensor): 97 | fisheye_inv_polynomial = self.fisheye_inverse_polynomial 98 | point3d = point3d_original.clone() 99 | point3d[:, 2] = point3d_original[:, 2] * -1 100 | point3d = point3d.transpose(0, 1) 101 | xc, yc = self.img_center[0], self.img_center[1] 102 | xc = torch.Tensor([xc]).float().to(point3d.device) 103 | yc = torch.Tensor([yc]).float().to(point3d.device) 104 | point2d = torch.empty((2, point3d.shape[-1])).to(point3d.device) 105 | 106 | norm = torch.norm(point3d[:2], dim=0) 107 | 108 | if (norm != 0).all(): 109 | theta = torch.atan(point3d[2] / norm) 110 | invnorm = 1.0 / norm 111 | t = theta 112 | rho = fisheye_inv_polynomial[0] 113 | t_i = 1.0 114 | 115 | for i in range(1, len(fisheye_inv_polynomial)): 116 | t_i *= t 117 | rho += t_i * fisheye_inv_polynomial[i] 118 | 119 | x = point3d[0] * invnorm * rho 120 | y = point3d[1] * invnorm * rho 121 | 122 | point2d[0] = x + xc 123 | point2d[1] = y + yc 124 | else: 125 | point2d[0] = xc 126 | point2d[1] = yc 127 | raise Exception("norm is zero!") 128 | 129 | return point2d.transpose(0, 1) 130 | 131 | def undistort(self, point_2Ds): 132 | """ 133 | undistort the input 2d points in fisheye camera 134 | """ 135 | point_length = point_2Ds.shape[0] 136 | depths = np.ones(shape=point_length) 137 | point_3Ds = self.camera2world(point_2Ds, depths) 138 | 139 | # point_3Ds_homo = np.ones((point_length, 4)) 140 | # point_3Ds_homo[:, :3] = point_3Ds 141 | 142 | projected_2d_points = (self.intrinsic[:3, :3] @ point_3Ds.T).T 143 | projected_2d_points = projected_2d_points[:, :2] / projected_2d_points[:, 2:] 144 | return projected_2d_points 145 | 146 | 147 | 148 | 149 | if __name__ == '__main__': 150 | camera = FishEyeCameraCalibrated('fisheye.calibration.json') 151 | point = np.array([[660, 520], [520, 660], [123, 456]]) 152 | depth = np.array([30, 30, 40]) 153 | point = torch.from_numpy(point) 154 | depth = torch.from_numpy(depth) 155 | point3d = camera.camera2world_pytorch(point, depth) 156 | print(point3d) 157 | 158 | # reprojected_point_2d = camera.world2camera(point3d) 159 | # print(reprojected_point_2d) 160 | 161 | reprojected_point_2d = camera.world2camera(point3d.numpy()) 162 | print(reprojected_point_2d) 163 | -------------------------------------------------------------------------------- /networks/interpolant.py: -------------------------------------------------------------------------------- 1 | import open3d 2 | import sys 3 | 4 | sys.path.append('..') 5 | import torch 6 | from models.SeqConvVAE import ConvVAE 7 | import pickle 8 | from utils.skeleton import Skeleton 9 | import os 10 | from tqdm import tqdm 11 | from torch.utils.data import DataLoader 12 | import numpy as np 13 | from torch.utils.data import Dataset 14 | from old_dataset import AMASSDataset 15 | from utils.utils import transform_pose 16 | from utils.rigid_transform_with_scale import umeyama 17 | 18 | 19 | def calculate_error(estimated_seq, gt_seq): 20 | estimated_seq = np.asarray(estimated_seq) 21 | gt_seq = np.asarray(gt_seq) 22 | distance = estimated_seq - gt_seq 23 | distance = np.linalg.norm(distance, axis=2) 24 | m_distance = np.mean(distance) 25 | return m_distance 26 | 27 | 28 | def global_align_skeleton_seq(estimated_seq, gt_seq): 29 | estimated_seq = np.asarray(estimated_seq).reshape((-1, 3)) 30 | gt_seq = np.asarray(gt_seq).reshape(-1, 3) 31 | # aligned_pose_list = np.zeros_like(estimated_seq) 32 | # for s in range(estimated_seq.shape[0]): 33 | # pose_p = estimated_seq[s] 34 | # pose_gt_bs = gt_seq[s] 35 | c, R, t = umeyama(estimated_seq, gt_seq) 36 | pose_p = estimated_seq.dot(R) * c + t 37 | # aligned_pose_list[s] = pose_p 38 | 39 | return pose_p.reshape((-1, 15, 3)) 40 | 41 | 42 | def align_skeleton(estimated_seq, gt_seq): 43 | estimated_seq = np.asarray(estimated_seq) 44 | gt_seq = np.asarray(gt_seq) 45 | aligned_pose_list = np.zeros_like(estimated_seq) 46 | for s in range(estimated_seq.shape[0]): 47 | pose_p = estimated_seq[s] 48 | pose_gt_bs = gt_seq[s] 49 | c, R, t = umeyama(pose_p, pose_gt_bs) 50 | pose_p = pose_p.dot(R) * c + t 51 | aligned_pose_list[s] = pose_p 52 | 53 | return aligned_pose_list 54 | 55 | class InterpolateDataset(Dataset): 56 | def __init__(self, pkl_path): 57 | with open(pkl_path, 'rb') as f: 58 | self.data = pickle.load(f) 59 | 60 | def __len__(self): 61 | return len(self.data) 62 | 63 | def __getitem__(self, i): 64 | data_i = self.data[i] 65 | data_i = data_i.reshape((-1, 45)) 66 | data_i = torch.from_numpy(data_i).float() 67 | return data_i 68 | 69 | class Interpolate: 70 | def __init__(self, pkl_path, network_path, out_path, seq_length=20, windows_size=5, latent_dim=128): 71 | self.seq_length = seq_length 72 | self.out_path = out_path 73 | self.network = ConvVAE(in_channels=45, out_channels=45, latent_dim=latent_dim, seq_len=self.seq_length) 74 | state_dict = torch.load(network_path)['state_dict'] 75 | self.network.load_state_dict(state_dict) 76 | self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 77 | self.network = self.network.to(self.device) 78 | 79 | self.skeleton_model = Skeleton(None) 80 | 81 | self.test_dataset = InterpolateDataset(pkl_path=pkl_path) 82 | 83 | def save_sample(self, sample, out_path): 84 | skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample) 85 | for i, skeleton in enumerate(skeleton_list): 86 | open3d.io.write_triangle_mesh(os.path.join(out_path, 'out_%04d.ply' % i), skeleton) 87 | 88 | def show_example(self, sample): 89 | skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample) 90 | coor = open3d.geometry.TriangleMesh.create_coordinate_frame() 91 | open3d.visualization.draw_geometries(skeleton_list + [coor]) 92 | 93 | 94 | def test(self, i, j, visualization=False, save=False): 95 | print('---------------------Start Eval-----------------------') 96 | self.network.eval() 97 | with torch.no_grad(): 98 | seq_i = self.test_dataset[i].unsqueeze(0).to(self.device) 99 | seq_j = self.test_dataset[j].unsqueeze(0).to(self.device) 100 | 101 | mu_i, std_i, z_i = self.network.get_latent_space(seq_i) 102 | mu_j, std_j, z_j = self.network.get_latent_space(seq_j) 103 | 104 | first_sampled_seq = self.network.decode(z_i).permute((0, 2, 1))[0] 105 | first_sampled_seq = first_sampled_seq.cpu().detach_().numpy() 106 | first_sampled_seq = first_sampled_seq.reshape((self.seq_length, 15, 3)) 107 | 108 | second_sampled_seq = self.network.decode(z_j).permute((0, 2, 1))[0] 109 | second_sampled_seq = second_sampled_seq.cpu().detach_().numpy() 110 | second_sampled_seq = second_sampled_seq.reshape((self.seq_length, 15, 3)) 111 | 112 | if visualization is True: 113 | self.show_example(first_sampled_seq) 114 | self.show_example(second_sampled_seq) 115 | if save is True: 116 | for i in range(6): 117 | out_dir = os.path.join(self.out_path, str(i)) 118 | if not os.path.isdir(out_dir): 119 | os.makedirs(out_dir) 120 | self.save_sample(first_sampled_seq, os.path.join(self.out_path, "0")) 121 | self.save_sample(second_sampled_seq, os.path.join(self.out_path, "5")) 122 | 123 | first_z = z_i[0].cpu().detach_().numpy() 124 | second_z = z_j[0].cpu().detach_().numpy() 125 | 126 | interpolant_list = [first_z + (i / 5.) * (second_z - first_z) for i in range(1, 5)] 127 | interpolant_list = torch.from_numpy(np.asarray(interpolant_list)).float() 128 | interpolant_list = interpolant_list.to(self.device) 129 | 130 | sampled_result = self.network.decode(interpolant_list) 131 | sampled_result = sampled_result.permute((0, 2, 1)) 132 | sampled_result = sampled_result.cpu().detach_().numpy() 133 | sampled_result = sampled_result.reshape((-1, self.seq_length, 15, 3)) 134 | for i, sample in enumerate(sampled_result): 135 | if visualization is True: 136 | self.show_example(sample) 137 | if save is True: 138 | self.save_sample(sample, os.path.join(self.out_path, str(i + 1))) 139 | 140 | 141 | if __name__ == '__main__': 142 | pkl_path = r'../AMASSDataConverter/pkl_data/data_nframe_20_slidewindow_False.pkl' 143 | sampler = Interpolate(pkl_path=pkl_path, 144 | network_path=r'\\winfs-inf\HPS\Mo2Cap2Plus1\work\BodyPoseOptimizer\networks\logs/2_full_dataset_latent_2048_len_20_slide_window_step_1_kl_0.5/checkpoints/14.pth.tar', 145 | out_path='out/interpolate2', seq_length=20, latent_dim=2048, windows_size=1) 146 | for start, end in [(1770, 1300)]: 147 | print('process: {}'.format((start, end))) 148 | sampler.out_path = './interpolate_{}_{}'.format(start, end) 149 | sampler.test(int(start), int(end), visualization=False, save=True) 150 | -------------------------------------------------------------------------------- /optimize_whole_sequence.py: -------------------------------------------------------------------------------- 1 | from optimizer import main 2 | import numpy as np 3 | from natsort import natsorted 4 | 5 | if __name__ == '__main__': 6 | import argparse 7 | import os 8 | 9 | parser = argparse.ArgumentParser(description='Data directory number') 10 | parser.add_argument('--data_path', required=True, type=str) 11 | parser.add_argument('--camera', required=False, type=str, 12 | default='utils/fisheye/fisheye.calibration.json') 13 | 14 | parser.add_argument('--vae', required=False, type=float, default=0.00) 15 | parser.add_argument('--gmm', required=False, type=float, default=0.00) 16 | parser.add_argument('--smooth', required=False, type=float, default=0.001) 17 | parser.add_argument('--bone_length', required=False, type=float, default=0.01) 18 | parser.add_argument('--weight_3d', required=False, type=float, default=0.01) 19 | parser.add_argument('--reproj_weight', required=False, type=float, default=0.01) 20 | parser.add_argument('--save', required=False, default=False, type=lambda x: (str(x).lower() == 'true')) 21 | parser.add_argument('--final_smooth', required=False, default=True, type=lambda x: (str(x).lower() == 'true')) 22 | parser.add_argument('--merge', required=False, default=True, type=lambda x: (str(x).lower() == 'true')) 23 | args = parser.parse_args() 24 | 25 | data_dir = args.data_path 26 | 27 | estimated_pose = [] 28 | optimized_pose = [] 29 | gt_pose = [] 30 | 31 | original_global_mpjpe = [] 32 | mid_global_mpjpe = [] 33 | optimized_global_mpjpe = [] 34 | original_camera_pos_error = [] 35 | optimized_camera_pos_error = [] 36 | original_aligned_camera_pos_error = [] 37 | optimized_aligned_camera_pos_error = [] 38 | original_aligned_global_mpjpe = [] 39 | aligned_mid_seq_mpjpe = [] 40 | optimized_aligned_global_mpjpe = [] 41 | aligned_original_mpjpe = [] 42 | aligned_mid_optimized_mpjpe = [] 43 | aligned_optimized_mpjpe = [] 44 | bone_length_aligned_original_mpjpe = [] 45 | bone_length_aligned_mid_optimized_mpjpe = [] 46 | bone_length_aligned_optimized_mpjpe = [] 47 | joints_error = [] 48 | for data_path_name in natsorted(os.listdir(data_dir)): 49 | data_path = os.path.join(data_dir, data_path_name) 50 | print("running data: {}".format(data_path)) 51 | if not os.path.isdir(data_path): 52 | continue 53 | bone_length = args.bone_length 54 | reproj_weight = args.reproj_weight 55 | res, estimated_pose_seq, mid_local_pose_seq, optimized_pose_seq, gt_pose_seq = main(data_path, 56 | camera_model_path=args.camera, 57 | vae_weight=args.vae, gmm_weight=args.gmm, 58 | smoothness_weight=args.smooth, 59 | visualization=False, save=args.save, 60 | bone_length_weight=bone_length, 61 | weight_3d=args.weight_3d, 62 | reproj_weight=reproj_weight, merge=args.merge, 63 | final_smooth=args.final_smooth) 64 | 65 | estimated_pose.extend(estimated_pose_seq) 66 | optimized_pose.extend(optimized_pose_seq) 67 | gt_pose.extend(gt_pose_seq) 68 | 69 | original_global_mpjpe.append(res['original_global_mpjpe']) 70 | mid_global_mpjpe.append(res['mid_global_mpjpe']) 71 | optimized_global_mpjpe.append(res['optimized_global_mpjpe']) 72 | original_camera_pos_error.append(res['original_camera_pos_error']) 73 | optimized_camera_pos_error.append(res['optimized_camera_pos_error']) 74 | original_aligned_camera_pos_error.append(res['original_aligned_camera_pos_error']) 75 | optimized_aligned_camera_pos_error.append(res['optimized_aligned_camera_pos_error']) 76 | original_aligned_global_mpjpe.append(res['original_aligned_global_mpjpe']) 77 | aligned_mid_seq_mpjpe.append(res['aligned_mid_seq_mpjpe']) 78 | optimized_aligned_global_mpjpe.append(res['optimized_aligned_global_mpjpe']) 79 | aligned_original_mpjpe.append(res['aligned_original_mpjpe']) 80 | aligned_mid_optimized_mpjpe.append(res['aligned_mid_optimized_mpjpe']) 81 | aligned_optimized_mpjpe.append(res['aligned_optimized_mpjpe']) 82 | bone_length_aligned_original_mpjpe.append(res['bone_length_aligned_original_mpjpe']) 83 | bone_length_aligned_mid_optimized_mpjpe.append(res['bone_length_aligned_mid_optimized_mpjpe']) 84 | bone_length_aligned_optimized_mpjpe.append(res['bone_length_aligned_optimized_mpjpe']) 85 | joints_error.append(res['joints_error']) 86 | 87 | if res['bone_length_aligned_optimized_mpjpe'] > res['bone_length_aligned_mid_optimized_mpjpe']: 88 | print(res) 89 | 90 | print('Average original global pose mpjpe: {}'.format(np.average(original_global_mpjpe))) 91 | print('Average mid global pose mpjpe: {}'.format(np.average(mid_global_mpjpe))) 92 | print('Average optimized global pose mpjpe: {}'.format(np.average(optimized_global_mpjpe))) 93 | print('-----------------------------------------') 94 | print('Average original cam pose error: {}'.format(np.average(original_camera_pos_error))) 95 | print('Average optimized cam pose error: {}'.format(np.average(optimized_camera_pos_error))) 96 | print('-----------------------------------------') 97 | print('Average original aligned cam pose error: {}'.format(np.average(original_aligned_camera_pos_error))) 98 | print('Average optimized aligned cam pose error: {}'.format(np.average(optimized_aligned_camera_pos_error))) 99 | print('-----------------------------------------') 100 | print('Average original_aligned_global_mpjpe: {}'.format(np.average(original_aligned_global_mpjpe))) 101 | print('Average aligned_mid_seq_mpjpe: {}'.format(np.average(aligned_mid_seq_mpjpe))) 102 | print('Average optimized_aligned_global_mpjpe: {}'.format(np.average(optimized_aligned_global_mpjpe))) 103 | print('-----------------------------------------') 104 | print('Average aligned original global pose mpjpe: {}'.format(np.average(aligned_original_mpjpe))) 105 | print('Average aligned mid local pose mpjpe: {}'.format(np.average(aligned_mid_optimized_mpjpe))) 106 | print('Average aligned optimized global pose mpjpe: {}'.format(np.average(aligned_optimized_mpjpe))) 107 | print('-----------------------------------------') 108 | print('Average bone length aligned original global pose mpjpe: {}'.format( 109 | np.average(bone_length_aligned_original_mpjpe))) 110 | print('Average bone length aligned mid local pose mpjpe: {}'.format( 111 | np.average(bone_length_aligned_mid_optimized_mpjpe))) 112 | print('Average bone length aligned optimized global pose mpjpe: {}'.format( 113 | np.average(bone_length_aligned_optimized_mpjpe))) 114 | print('-----------------------------------------') 115 | print('joints error is: {}'.format(np.mean(joints_error, axis=0))) 116 | 117 | print('-------------------------------------------------------------') 118 | -------------------------------------------------------------------------------- /networks/dataset/local_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('../..') 3 | import open3d 4 | 5 | import torch 6 | from torch.utils.data.dataset import Dataset 7 | from torch.utils.data.dataloader import DataLoader 8 | from tqdm import tqdm 9 | import pickle 10 | import os 11 | from utils.utils import get_relative_global_pose_with_camera_matrix 12 | import numpy as np 13 | 14 | 15 | class AMASSDataset(Dataset): 16 | def __init__(self, data_path, frame_num, windows_size=1, is_train=True, fps=25, slide_window=True, 17 | balance_distrib=False, with_mo2cap2_data=False): 18 | """ 19 | 20 | :param data_path: directory with pkls containing data 21 | :param frame_num: length of sequence 22 | :param windows_size: one frame from n frame 23 | :param is_train: 24 | :param fps: the fps of test data 25 | """ 26 | self.windows_size = windows_size 27 | self.is_train = is_train 28 | self.slide_window = slide_window 29 | self.frame_num = frame_num 30 | self.balance_distrib = balance_distrib 31 | self.with_mo2cap2_data = with_mo2cap2_data 32 | self.data_list = self.load_pkls(data_path, is_train, self.with_mo2cap2_data, self.balance_distrib) 33 | self.local_pose_seq_list = self.get_local_pose_list(self.data_list, frame_num=self.frame_num, 34 | windows_size=self.windows_size, 35 | fps=fps) 36 | # with open(output_path, 'wb') as f: 37 | # pickle.dump(self.relative_pose_seq_list, f, protocol=4) 38 | 39 | def __getitem__(self, i): 40 | data_i = self.local_pose_seq_list[i] 41 | data_i = data_i.reshape((-1, 45)) 42 | data_i = torch.from_numpy(data_i).float() 43 | return data_i 44 | 45 | def __len__(self): 46 | return len(self.local_pose_seq_list) 47 | 48 | def load_pkls(self, path, is_train, with_mo2cap2_data=False, balance_distrib=False): 49 | data_list = [] 50 | raw_pkl_path_list = os.listdir(path) 51 | if with_mo2cap2_data is True: 52 | print('use only mo2cap2 motion!') 53 | seq_names = np.load('/HPS/ScanNet/static00/SURREAL/smpl_data/seq_names.npy').tolist() 54 | pkl_path_list = [] 55 | for seq_name in seq_names: 56 | for pkl_path in raw_pkl_path_list: 57 | if seq_name in pkl_path: 58 | pkl_path_list.append(pkl_path) 59 | else: 60 | pkl_path_list = raw_pkl_path_list 61 | print(len(pkl_path_list)) 62 | # pkl_path_list = [pkl_path for pkl_path in pkl_path_list if 'CMU' in pkl_path] 63 | if is_train: 64 | pkl_path_list = pkl_path_list[:-10] 65 | else: 66 | pkl_path_list = pkl_path_list[-10:] 67 | 68 | if balance_distrib is True: 69 | print('use balanced distribution') 70 | walking_pkl_path_list = [pkl_path for pkl_path in pkl_path_list if 'walk' in pkl_path.lower()] 71 | non_walking_pkl_path_list = [pkl_path for pkl_path in pkl_path_list if 'walk' not in pkl_path.lower()] 72 | np.random.shuffle(walking_pkl_path_list) 73 | walking_num = int(1 / 20 * len(non_walking_pkl_path_list)) 74 | pkl_path_list = non_walking_pkl_path_list + walking_pkl_path_list[:walking_num] 75 | for file_name in tqdm(pkl_path_list): 76 | file_path = os.path.join(path, file_name) 77 | with open(file_path, 'rb') as f: 78 | data = pickle.load(f) 79 | data_list.append(data) 80 | return data_list 81 | 82 | def get_local_pose_list(self, data_list, frame_num=10, windows_size=1, fps=30): 83 | total_frame_number = frame_num * windows_size 84 | 85 | local_pose_seq_list = [] 86 | for seq_data in tqdm(data_list): 87 | seq_len = len(seq_data['local_pose_list']) 88 | data_frame_rate = int(seq_data['frame_rate']) 89 | frame_rate_timer = round(data_frame_rate / fps) 90 | if self.slide_window is True: 91 | interval = 1 92 | else: 93 | interval = total_frame_number * frame_rate_timer 94 | for i in range(0, seq_len - total_frame_number * frame_rate_timer, interval): 95 | local_pose_seq_list.append( 96 | seq_data['local_pose_list'][i: i + total_frame_number * frame_rate_timer: frame_rate_timer]) 97 | 98 | return np.asarray(local_pose_seq_list) 99 | 100 | 101 | class Mo2Cap2Dataset(Dataset): 102 | def __init__(self, pkl_path, frame_num=5, slide_window=False): 103 | self.frame_num = frame_num 104 | with open(pkl_path, 'rb') as f: 105 | data = pickle.load(f) 106 | estimated_local_skeleton = data['estimated_local_skeleton'] 107 | gt_skeleton = data['gt_global_skeleton'] 108 | cv2world_mat_list = data['camera_pose_list'] 109 | 110 | self.estimated_local_pose_list = [] 111 | self.gt_pose_list = [] 112 | self.cam_list = [] 113 | # split to 5-frame sequence 114 | for i in tqdm(range(0, len(estimated_local_skeleton) - frame_num, frame_num)): 115 | estimated_seq = estimated_local_skeleton[i: i + self.frame_num] 116 | cam_seq = cv2world_mat_list[i: i + self.frame_num] 117 | gt_seq = gt_skeleton[i: i + self.frame_num] 118 | self.estimated_local_pose_list.append(np.asarray(estimated_seq)) 119 | self.gt_pose_list.append(np.asarray(gt_seq)) 120 | self.cam_list.append(np.asarray(cam_seq)) 121 | 122 | self.slide_window = False 123 | 124 | self.relative_global_pose_list = self.get_relative_global_pose_list(self.estimated_local_pose_list, 125 | self.cam_list) 126 | 127 | print(len(self.relative_global_pose_list)) 128 | 129 | def get_relative_global_pose_list(self, estimated_local_pose_list, cam_list): 130 | relative_global_pose_seq_list = [] 131 | 132 | for local_pose_seq, camera_seq in tqdm(zip(estimated_local_pose_list, cam_list)): 133 | relative_global_pose_seq = get_relative_global_pose_with_camera_matrix(local_pose_seq, camera_seq) 134 | 135 | # select {frame_num} pose from relative global pose list 136 | final_relative_global_pose_seq = [] 137 | for i in range(0, len(relative_global_pose_seq)): 138 | final_relative_global_pose_seq.append(relative_global_pose_seq[i]) 139 | 140 | relative_global_pose_seq_list.append(final_relative_global_pose_seq) 141 | 142 | return np.asarray(relative_global_pose_seq_list) 143 | 144 | def __getitem__(self, index): 145 | out_pose = self.relative_global_pose_list[index].reshape([-1, 45]) 146 | out_pose = torch.from_numpy(out_pose).float() 147 | 148 | out_cam = self.cam_list[index] 149 | out_cam = torch.from_numpy(out_cam).float() 150 | 151 | out_gt = self.gt_pose_list[index] 152 | out_gt = torch.from_numpy(out_gt).float() 153 | return out_pose, out_cam, out_gt 154 | 155 | def __len__(self): 156 | return len(self.relative_global_pose_list) 157 | 158 | 159 | if __name__ == '__main__': 160 | 161 | dataset = AMASSDataset(data_path='/home/jianwang/ScanNet/static00/EgocentricAMASS', 162 | frame_num=10, windows_size=1, 163 | is_train=True) 164 | print(len(dataset)) 165 | 166 | # dataset = Mo2Cap2Dataset(pkl_path='../test_data_weipeng_studio/data_start_887_end_987/test_data.pkl') 167 | # from utils.skeleton import Skeleton 168 | # 169 | # skeleton_model = Skeleton(calibration_path=None) 170 | # d = dataset.relative_global_pose_list[0] 171 | # skeleton_list = skeleton_model.joint_list_2_mesh_list(d) 172 | # open3d.visualization.draw_geometries(skeleton_list) 173 | -------------------------------------------------------------------------------- /networks/train_local_global.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('..') 4 | import torch 5 | import numpy as np 6 | 7 | from config import args 8 | import datetime 9 | import os 10 | from torch.utils.data import DataLoader 11 | from tqdm import tqdm 12 | from torch.utils.tensorboard import SummaryWriter 13 | from torch.optim import Adam 14 | from pprint import pprint 15 | from utils.rigid_transform_with_scale import umeyama 16 | # from SeqConvVAE import ConvVAE 17 | # from SeqCCVAE import ConvVAE 18 | from models.LocalGlobalSeqVAE import LocalGlobalSeqVAE 19 | from models.SeqCVAE_attention import AttentionVAE 20 | from models.RecurrentVAE import RecurrentVAE 21 | 22 | 23 | def prepare_log_dir(log_dir=None): 24 | # prepare log dirs 25 | if args.log_dir is None and log_dir is None: 26 | log_dir = datetime.datetime.now().strftime('%m.%d-%H:%M:%S') 27 | elif log_dir is None: 28 | log_dir = args.log_dir 29 | log_dir = os.path.join('logs', log_dir) 30 | print('making save dir at: {}'.format(log_dir)) 31 | os.makedirs(log_dir) 32 | checkpoints_dir = os.path.join(log_dir, 'checkpoints') 33 | tensorboard_dir = os.path.join(log_dir, 'tensorboard') 34 | os.makedirs(checkpoints_dir) 35 | os.makedirs(tensorboard_dir) 36 | return checkpoints_dir, tensorboard_dir 37 | 38 | 39 | class Train: 40 | def __init__(self): 41 | if args.new_dataset is True: 42 | from dataset import AMASSDataset 43 | else: 44 | from old_dataset import AMASSDataset 45 | self.seq_length = args.seq_length 46 | self.attention = args.attention 47 | 48 | self.amass_train_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=True, 49 | windows_size=args.slide_window_step, balance_distrib=args.data_balance) 50 | self.train_dataloader = DataLoader(dataset=self.amass_train_dataset, batch_size=args.batch_size, shuffle=True, 51 | drop_last=True, num_workers=args.num_workers) 52 | 53 | self.test_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=False, 54 | windows_size=args.slide_window_step, balance_distrib=args.data_balance) 55 | self.test_dataloader = DataLoader(self.test_dataset, batch_size=args.batch_size, shuffle=False, 56 | drop_last=False, num_workers=args.num_workers) 57 | # self.network = AttentionVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim, 58 | # seq_len=self.seq_length, attention_type=self.attention) 59 | self.network = LocalGlobalSeqVAE(in_channels=45, out_channels=45, local_vae_latent_dim=args.latent_dim, 60 | global_vae_latent_dim=args.latent_dim, seq_len=self.seq_length) 61 | # self.network = RecurrentVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim, seq_len=self.seq_length) 62 | 63 | self.optimizer = Adam(params=self.network.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) 64 | # self.l2_loss = torch.nn.MSELoss(reduction='elementwise_mean') 65 | # self.l2_loss = torch.nn.MSELoss() 66 | 67 | self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 68 | 69 | self.network = self.network.to(self.device) 70 | 71 | self.checkpoint_dir, tensorboard_dir = prepare_log_dir() 72 | self.tensorboard_writer = SummaryWriter(tensorboard_dir) 73 | 74 | def train(self): 75 | print('---------------------Start Training-----------------------') 76 | pprint(args.__dict__) 77 | running_loss = 0 78 | running_local_recon_loss = 0 79 | running_global_recon_loss = 0 80 | count = 0 81 | for e in range(args.epoch): 82 | self.network.train() 83 | print('-----------------Epoch: {}-----------------'.format(str(e))) 84 | for i, (relative_global_pose, local_pose, camera_matrix) in tqdm(enumerate(self.train_dataloader)): 85 | self.optimizer.zero_grad() 86 | # relative_global_pose = relative_global_pose.to(self.device) 87 | local_pose = local_pose.to(self.device) 88 | camera_matrix = camera_matrix.to(self.device) 89 | 90 | local_pose_out, local_pose_input, mu_local, log_var_local, \ 91 | global_pose_out, global_pose_input, mu_global, log_var_global = self.network(local_pose, camera_matrix) 92 | loss, other_data = \ 93 | self.network.loss_function(local_pose_out, local_pose_input, mu_local, log_var_local, 94 | args.kl_weight * 64 / len(self.amass_train_dataset), 95 | global_pose_out, global_pose_input, mu_global, log_var_global, 96 | args.kl_weight * 64 / len(self.amass_train_dataset)) 97 | loss.backward() 98 | self.optimizer.step() 99 | 100 | 101 | 102 | running_loss += loss.item() 103 | running_local_recon_loss += other_data[1].item() 104 | running_global_recon_loss += other_data[4].item() 105 | # logs 106 | if count % args.log_step == 0 and count != 0: 107 | # ...log the running loss 108 | self.tensorboard_writer.add_scalar('training loss', running_loss, count) 109 | self.tensorboard_writer.add_scalar('training local recon loss', running_local_recon_loss, count) 110 | self.tensorboard_writer.add_scalar('training global recon loss', running_global_recon_loss, count) 111 | print("running loss is: {}".format(running_loss)) 112 | print("running local recon loss is: {}".format(running_local_recon_loss)) 113 | print("running global recon loss is: {}".format(running_global_recon_loss)) 114 | running_loss = 0 115 | running_local_recon_loss = 0 116 | running_global_recon_loss = 0 117 | count += 1 118 | # log the logs 119 | eval_loss = self.eval() 120 | self.tensorboard_writer.add_scalar('eval loss', eval_loss, e) 121 | print('eval loss is: {}'.format(eval_loss)) 122 | 123 | torch.save({ 124 | 'epoch': e + 1, 125 | 'args': args.__dict__, 126 | 'state_dict': self.network.state_dict(), 127 | 'eval_result': eval_loss, 128 | 'optimizer': self.optimizer.state_dict(), 129 | }, os.path.join(self.checkpoint_dir, str(e) + '.pth.tar')) 130 | 131 | def eval(self): 132 | print('---------------------Start Eval-----------------------') 133 | self.network.eval() 134 | mpjpe_list = [] 135 | with torch.no_grad(): 136 | for i, (relative_global_pose, local_pose, camera_matrix) in tqdm(enumerate(self.test_dataloader)): 137 | relative_global_pose = relative_global_pose.to(self.device) 138 | local_pose = local_pose.to(self.device) 139 | camera_matrix = camera_matrix.to(self.device) 140 | 141 | local_pose_preds, local_pose_input, mu_local, log_var_local, \ 142 | global_pose_preds, global_pose_input, mu_global, log_var_global = self.network(local_pose, camera_matrix) 143 | 144 | global_pose_preds = global_pose_preds.cpu().numpy() 145 | global_pose_input = global_pose_input.cpu().numpy() 146 | relative_global_pose = np.reshape(relative_global_pose, [-1, self.seq_length, 15, 3]) 147 | pose_gt = np.reshape(relative_global_pose, [-1, self.seq_length, 15, 3]) 148 | mpjpe_list.append(self.mpjpe(global_pose_preds, pose_gt)) 149 | 150 | print('MPJPE is: {}'.format(np.mean(mpjpe_list))) 151 | return np.mean(mpjpe_list) 152 | 153 | def mpjpe(self, pose_preds, pose_gt): 154 | distance = np.linalg.norm(pose_gt - pose_preds, axis=3) 155 | return np.mean(distance) 156 | 157 | 158 | if __name__ == '__main__': 159 | train = Train() 160 | train.train() 161 | -------------------------------------------------------------------------------- /MakeDataForOptimization/slam_reader.py: -------------------------------------------------------------------------------- 1 | # read slam results 2 | import open3d 3 | from collections import OrderedDict 4 | import numpy as np 5 | from scipy.spatial.transform import Rotation 6 | from utils.rigid_transform_with_scale import umeyama 7 | from copy import deepcopy 8 | from utils.skeleton import Skeleton 9 | 10 | 11 | class SLAMReader: 12 | def __init__(self, fps=30): 13 | self.fps = fps 14 | self.skeleton_model = Skeleton(calibration_path=None) 15 | pass 16 | 17 | def trans_qrot_to_matrix(self, trans, rot): 18 | # rot is quat 19 | rot_mat = Rotation.from_quat(rot).as_matrix() 20 | mat = np.array([ 21 | np.concatenate([rot_mat[0], [trans[0]]]), 22 | np.concatenate([rot_mat[1], [trans[1]]]), 23 | np.concatenate([rot_mat[2], [trans[2]]]), 24 | [0, 0, 0, 1] 25 | ]) 26 | return mat 27 | 28 | def trans_rotmat_to_matrix(self, trans, rot): 29 | # rot is matrix 30 | mat = np.array([ 31 | np.concatenate([rot[0], [trans[0]]]), 32 | np.concatenate([rot[1], [trans[1]]]), 33 | np.concatenate([rot[2], [trans[2]]]), 34 | [0, 0, 0, 1] 35 | ]) 36 | return mat 37 | 38 | def visualize_traj(self, slam_traj_mat, gt_traj_mat): 39 | slam_point_cloud = open3d.geometry.PointCloud() 40 | slam_point_cloud.points = open3d.utility.Vector3dVector(slam_traj_mat) 41 | slam_point_cloud.paint_uniform_color([1, 0, 0]) 42 | gt_pointcloud = open3d.geometry.PointCloud() 43 | gt_pointcloud.points = open3d.utility.Vector3dVector(gt_traj_mat) 44 | gt_pointcloud.paint_uniform_color([0, 1, 0]) 45 | 46 | # mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame() 47 | 48 | open3d.visualization.draw_geometries([gt_pointcloud, slam_point_cloud]) 49 | 50 | def read_trajectory_new(self, trajectory_path, local_pose_list, gt_global_pose, start_frame, end_frame): 51 | with open(trajectory_path, 'r') as f: 52 | frame_lines = f.readlines() 53 | 54 | # mat_trajectory = OrderedDict() 55 | # trans_and_rot = OrderedDict() 56 | mat_trajectory = [] 57 | trans_and_rot = [] 58 | for frame in frame_lines: 59 | frame_info = frame.strip().split() 60 | frame_id = round(float(frame_info[0]) * self.fps) 61 | trans = np.array(frame_info[1:4]).astype(np.float) 62 | rot = np.array(frame_info[4:]).astype(np.float) 63 | if start_frame <= frame_id < end_frame: 64 | trans_and_rot.append({"loc": trans, 'rot': rot}) 65 | 66 | # trans and rot to relative trans and rot 67 | 68 | trans_and_rot = self.get_relative_camera_pose_list(trans_and_rot) 69 | 70 | gt_global_skeleton_list = np.asarray(gt_global_pose) 71 | 72 | 73 | # also align bone length 74 | # for i in range(len(gt_global_pose)): 75 | # local_pose = local_pose_list[i] 76 | # global_pose = gt_global_pose[i] 77 | # c_p, R_p, t_p = umeyama(local_pose, global_pose) 78 | # local_pose_list[i] *= c_p 79 | 80 | 81 | gt_head_locs = gt_global_skeleton_list[:, 0] 82 | 83 | 84 | slam_traj_list = [] 85 | gt_traj_list = [] 86 | for i in range(len(trans_and_rot)): 87 | slam_i_trans = trans_and_rot[i]['loc'] 88 | slam_i_rot = trans_and_rot[i]['rot'] 89 | 90 | # get head locs from SLAM 91 | slam_coord = self.trans_qrot_to_matrix(slam_i_trans, slam_i_rot) 92 | local_skeleton = local_pose_list[i] 93 | local_skeleton_points = open3d.geometry.PointCloud() 94 | local_skeleton_points.points = open3d.utility.Vector3dVector(local_skeleton) 95 | local_skeleton_points = local_skeleton_points.transform(slam_coord) 96 | global_skeleton = np.asarray(local_skeleton_points.points) 97 | 98 | slam_traj_list.append(global_skeleton[0]) 99 | gt_traj_list.append(gt_head_locs[i]) 100 | slam_traj_mat = np.vstack(slam_traj_list) 101 | gt_traj_mat = np.vstack(gt_traj_list) 102 | 103 | c, R, t = umeyama(slam_traj_mat, gt_traj_mat) 104 | c_1, R_1, t_1 = umeyama(gt_traj_mat, slam_traj_mat) 105 | 106 | # self.visualize_traj(slam_traj_mat.dot(R) * c + t, gt_traj_mat) 107 | 108 | # self.show_global_pose_sequence_and_slam_trajectory(global_skeleton_list, slam_traj_mat) 109 | 110 | for i in range(len(trans_and_rot)): 111 | trans = trans_and_rot[i]['loc'] 112 | rot = trans_and_rot[i]['rot'] 113 | trans = trans * c 114 | 115 | transform_mat = self.trans_qrot_to_matrix(trans, rot) 116 | 117 | mat_trajectory.append(transform_mat) 118 | 119 | 120 | 121 | return mat_trajectory, R_1, t_1 122 | 123 | def show_global_pose_sequence_and_slam_trajectory(self, global_skeleton_list, slam_traj_mat): 124 | global_mesh = [] 125 | for skeleton in global_skeleton_list: 126 | global_mesh.append(self.skeleton_model.joints_2_mesh(skeleton)) 127 | slam_point_cloud = open3d.geometry.PointCloud() 128 | slam_point_cloud.points = open3d.utility.Vector3dVector(slam_traj_mat) 129 | slam_point_cloud.paint_uniform_color([1, 0, 0]) 130 | 131 | mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame() 132 | 133 | open3d.visualization.draw_geometries([slam_point_cloud, mesh_frame] + global_mesh) 134 | 135 | def trans_qrot_to_matrix(self, trans, rot): 136 | # rot is quat 137 | rot_mat = Rotation.from_quat(rot).as_matrix() 138 | mat = np.array([ 139 | np.concatenate([rot_mat[0], [trans[0]]]), 140 | np.concatenate([rot_mat[1], [trans[1]]]), 141 | np.concatenate([rot_mat[2], [trans[2]]]), 142 | [0, 0, 0, 1] 143 | ]) 144 | return mat 145 | 146 | def matrix_to_trans_qrot(self, matrix): 147 | translation = np.asarray([matrix[0][3], matrix[1][3], matrix[2][3]]) 148 | 149 | rotation_matrix = np.asarray([matrix[0][:3], matrix[1][:3], matrix[2][:3]]) 150 | 151 | qrot = Rotation.from_matrix(rotation_matrix).as_quat() 152 | 153 | return translation, qrot 154 | 155 | def get_relative_camera_pose_list(self, camera_pose_list): 156 | # firstly get relative camera pose list 157 | relative_camera_pose_list = [] 158 | camera_pose_0 = deepcopy(camera_pose_list[0]) 159 | camera_matrix_0 = self.trans_qrot_to_matrix(camera_pose_0['loc'], camera_pose_0['rot']) 160 | camera_matrix_0_inv = np.linalg.inv(camera_matrix_0) 161 | for i, camera_pose_i in enumerate(camera_pose_list): 162 | camera_matrix_i = self.trans_qrot_to_matrix(camera_pose_i['loc'], camera_pose_i['rot']) 163 | camera_matrix_i_to_0 = camera_matrix_0_inv.dot(camera_matrix_i) 164 | trans, qrot = self.matrix_to_trans_qrot(camera_matrix_i_to_0) 165 | relative_camera_pose_list.append({'loc': trans, 166 | 'rot': qrot}) 167 | return relative_camera_pose_list 168 | 169 | def read_trajectory(self, trajectory_path, start_frame, end_frame, scale=1): 170 | with open(trajectory_path, 'r') as f: 171 | frame_lines = f.readlines() 172 | 173 | # mat_trajectory = OrderedDict() 174 | # trans_and_rot = OrderedDict() 175 | mat_trajectory = [] 176 | trans_and_rot = [] 177 | for frame in frame_lines: 178 | frame_info = frame.strip().split() 179 | frame_id = round(float(frame_info[0]) * self.fps) 180 | trans = np.array(frame_info[1:4]).astype(np.float) 181 | rot = np.array(frame_info[4:]).astype(np.float) 182 | if start_frame <= frame_id < end_frame: 183 | trans_and_rot.append({"loc": trans, 'rot': rot}) 184 | 185 | # trans and rot to relative trans and rot 186 | 187 | trans_and_rot = self.get_relative_camera_pose_list(trans_and_rot) 188 | 189 | 190 | slam_traj_list = [] 191 | for i in range(len(trans_and_rot)): 192 | trans = trans_and_rot[i]['loc'] 193 | rot = trans_and_rot[i]['rot'] 194 | trans = trans * scale 195 | 196 | transform_mat = self.trans_qrot_to_matrix(trans, rot) 197 | 198 | mat_trajectory.append(transform_mat) 199 | 200 | return mat_trajectory 201 | -------------------------------------------------------------------------------- /networks/dataset/global_dataset.py: -------------------------------------------------------------------------------- 1 | import open3d 2 | import torch 3 | from torch.utils.data.dataset import Dataset 4 | from torch.utils.data.dataloader import DataLoader 5 | from tqdm import tqdm 6 | import pickle 7 | import os 8 | from utils.utils import get_relative_global_pose, get_relative_global_pose_with_camera_matrix 9 | import numpy as np 10 | 11 | 12 | class AMASSDataset(Dataset): 13 | def __init__(self, data_path, frame_num, windows_size=1, is_train=True, fps=25, slide_window=True, 14 | balance_distrib=False, with_mo2cap2_data=False): 15 | """ 16 | 17 | :param data_path: directory with pkls containing data 18 | :param frame_num: length of sequence 19 | :param windows_size: one frame from n frame 20 | :param is_train: 21 | :param fps: the fps of test data 22 | """ 23 | self.windows_size = windows_size 24 | self.is_train = is_train 25 | self.slide_window = slide_window 26 | self.frame_num = frame_num 27 | self.balance_distrib = balance_distrib 28 | self.with_mo2cap2_data = with_mo2cap2_data 29 | self.data_list = self.load_pkls(data_path, is_train, self.with_mo2cap2_data, self.balance_distrib) 30 | self.relative_pose_seq_list = self.get_relative_global_pose_list(self.data_list, frame_num=self.frame_num, 31 | windows_size=self.windows_size, 32 | fps=fps) 33 | 34 | def __getitem__(self, i): 35 | data_i = self.relative_pose_seq_list[i] 36 | data_i = data_i.reshape((-1, 45)) 37 | data_i = torch.from_numpy(data_i).float() 38 | return data_i 39 | 40 | def __len__(self): 41 | return len(self.relative_pose_seq_list) 42 | 43 | def load_pkls(self, path, is_train, with_mo2cap2_data=False, balance_distrib=False): 44 | data_list = [] 45 | raw_pkl_path_list = os.listdir(path) 46 | if with_mo2cap2_data is True: 47 | print('use only mo2cap2 motion!') 48 | seq_names = np.load('/HPS/ScanNet/static00/SURREAL/smpl_data/seq_names.npy').tolist() 49 | pkl_path_list = [] 50 | for seq_name in seq_names: 51 | for pkl_path in raw_pkl_path_list: 52 | if seq_name in pkl_path: 53 | pkl_path_list.append(pkl_path) 54 | else: 55 | pkl_path_list = raw_pkl_path_list 56 | print(len(pkl_path_list)) 57 | if is_train: 58 | pkl_path_list = pkl_path_list[:-10] 59 | else: 60 | pkl_path_list = pkl_path_list[-10:] 61 | 62 | if balance_distrib is True: 63 | print('use balanced distribution') 64 | walking_pkl_path_list = [pkl_path for pkl_path in pkl_path_list if 'walk' in pkl_path.lower()] 65 | non_walking_pkl_path_list = [pkl_path for pkl_path in pkl_path_list if 'walk' not in pkl_path.lower()] 66 | np.random.shuffle(walking_pkl_path_list) 67 | walking_num = int(1 / 20 * len(non_walking_pkl_path_list)) 68 | pkl_path_list = non_walking_pkl_path_list + walking_pkl_path_list[:walking_num] 69 | for file_name in tqdm(pkl_path_list): 70 | file_path = os.path.join(path, file_name) 71 | with open(file_path, 'rb') as f: 72 | data = pickle.load(f) 73 | data_list.append(data) 74 | return data_list 75 | 76 | def get_local_poses(self, data_list): 77 | local_pose_list = [] 78 | for data in data_list: 79 | local_pose_list.extend(data['local_pose_list']) 80 | return local_pose_list 81 | 82 | def get_relative_global_pose_list(self, data_list, frame_num=5, windows_size=5, fps=30): 83 | total_frame_number = frame_num * windows_size 84 | relative_global_pose_seq_list = [] 85 | 86 | local_pose_seq_list = [] 87 | camera_seq_list = [] 88 | for seq_data in data_list: 89 | seq_len = len(seq_data['local_pose_list']) 90 | data_frame_rate = int(seq_data['frame_rate']) 91 | frame_rate_timer = round(data_frame_rate / fps) 92 | if self.slide_window is True: 93 | interval = 1 94 | else: 95 | interval = total_frame_number * frame_rate_timer 96 | for i in range(0, seq_len - total_frame_number * frame_rate_timer, interval): 97 | local_pose_seq_list.append( 98 | seq_data['local_pose_list'][i: i + total_frame_number * frame_rate_timer: frame_rate_timer]) 99 | camera_seq_list.append( 100 | seq_data['cam_list'][i: i + total_frame_number * frame_rate_timer: frame_rate_timer]) 101 | for local_pose_seq, camera_seq in tqdm(zip(local_pose_seq_list, camera_seq_list)): 102 | relative_global_pose_seq = get_relative_global_pose(local_pose_seq, camera_seq) 103 | 104 | # select {frame_num} pose from relative global pose list 105 | final_relative_global_pose_seq = [] 106 | for i in range(0, len(relative_global_pose_seq), windows_size): 107 | final_relative_global_pose_seq.append(relative_global_pose_seq[i]) 108 | 109 | relative_global_pose_seq_list.append(final_relative_global_pose_seq) 110 | 111 | return np.asarray(relative_global_pose_seq_list) 112 | 113 | 114 | class Mo2Cap2Dataset(Dataset): 115 | def __init__(self, pkl_path, frame_num=5, slide_window=False): 116 | self.frame_num = frame_num 117 | with open(pkl_path, 'rb') as f: 118 | data = pickle.load(f) 119 | estimated_local_skeleton = data['estimated_local_skeleton'] 120 | gt_skeleton = data['gt_global_skeleton'] 121 | cv2world_mat_list = data['camera_pose_list'] 122 | 123 | self.estimated_local_pose_list = [] 124 | self.gt_pose_list = [] 125 | self.cam_list = [] 126 | # split to 5-frame sequence 127 | for i in tqdm(range(0, len(estimated_local_skeleton) - frame_num, frame_num)): 128 | estimated_seq = estimated_local_skeleton[i: i + self.frame_num] 129 | cam_seq = cv2world_mat_list[i: i + self.frame_num] 130 | gt_seq = gt_skeleton[i: i + self.frame_num] 131 | self.estimated_local_pose_list.append(np.asarray(estimated_seq)) 132 | self.gt_pose_list.append(np.asarray(gt_seq)) 133 | self.cam_list.append(np.asarray(cam_seq)) 134 | 135 | self.slide_window = False 136 | 137 | self.relative_global_pose_list = self.get_relative_global_pose_list(self.estimated_local_pose_list, 138 | self.cam_list) 139 | 140 | print(len(self.relative_global_pose_list)) 141 | 142 | def get_relative_global_pose_list(self, estimated_local_pose_list, cam_list): 143 | relative_global_pose_seq_list = [] 144 | 145 | for local_pose_seq, camera_seq in tqdm(zip(estimated_local_pose_list, cam_list)): 146 | relative_global_pose_seq = get_relative_global_pose_with_camera_matrix(local_pose_seq, camera_seq) 147 | 148 | # select {frame_num} pose from relative global pose list 149 | final_relative_global_pose_seq = [] 150 | for i in range(0, len(relative_global_pose_seq)): 151 | final_relative_global_pose_seq.append(relative_global_pose_seq[i]) 152 | 153 | relative_global_pose_seq_list.append(final_relative_global_pose_seq) 154 | 155 | return np.asarray(relative_global_pose_seq_list) 156 | 157 | def __getitem__(self, index): 158 | out_pose = self.relative_global_pose_list[index].reshape([-1, 45]) 159 | out_pose = torch.from_numpy(out_pose).float() 160 | 161 | out_cam = self.cam_list[index] 162 | out_cam = torch.from_numpy(out_cam).float() 163 | 164 | out_gt = self.gt_pose_list[index] 165 | out_gt = torch.from_numpy(out_gt).float() 166 | return out_pose, out_cam, out_gt 167 | 168 | def __len__(self): 169 | return len(self.relative_global_pose_list) 170 | 171 | 172 | if __name__ == '__main__': 173 | # dataset = AMASSDataset(data_path='/home/wangjian/Develop/BodyPoseOptimization/AMASSDataConverter/pkls', 174 | # frame_num=5, windows_size=5, 175 | # is_train=False) 176 | # d = dataset[0] 177 | # print(d.shape) 178 | 179 | dataset = Mo2Cap2Dataset(pkl_path='../test_data_weipeng_studio/data_start_887_end_987/test_data.pkl') 180 | from utils.skeleton import Skeleton 181 | 182 | skeleton_model = Skeleton(calibration_path=None) 183 | d = dataset.relative_global_pose_list[0] 184 | skeleton_list = skeleton_model.joint_list_2_mesh_list(d) 185 | open3d.visualization.draw_geometries(skeleton_list) 186 | -------------------------------------------------------------------------------- /calculate_errors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from utils.rigid_transform_with_scale import umeyama 4 | from utils.skeleton import Skeleton 5 | from copy import deepcopy 6 | 7 | 8 | def global_align_skeleton_seq(estimated_seq, gt_seq): 9 | estimated_seq = deepcopy(estimated_seq) 10 | gt_seq = deepcopy(gt_seq) 11 | estimated_seq = np.asarray(estimated_seq).reshape((-1, 3)) 12 | gt_seq = np.asarray(gt_seq).reshape((-1, 3)) 13 | # aligned_pose_list = np.zeros_like(estimated_seq) 14 | # for s in range(estimated_seq.shape[0]): 15 | # pose_p = estimated_seq[s] 16 | # pose_gt_bs = gt_seq[s] 17 | c, R, t = umeyama(estimated_seq, gt_seq) 18 | pose_p = estimated_seq.dot(R) * c + t 19 | # aligned_pose_list[s] = pose_p 20 | 21 | return pose_p.reshape((-1, 15, 3)) 22 | 23 | 24 | def calculate_error(estimated_seq, gt_seq): 25 | estimated_seq = np.asarray(estimated_seq) 26 | gt_seq = np.asarray(gt_seq) 27 | distance = estimated_seq - gt_seq 28 | distance = np.linalg.norm(distance, axis=-1) 29 | m_distance = np.mean(distance) 30 | return m_distance 31 | 32 | 33 | def calculate_slam_error(estimated_seq, gt_seq, align=False): 34 | # seq shape: n_seq, 15, 3 35 | estimated_seq = np.asarray(estimated_seq) 36 | gt_seq = np.asarray(gt_seq) 37 | estimated_root_seq = (estimated_seq[:, 7, :] + estimated_seq[:, 11, :]) / 2 38 | gt_root_seq = (gt_seq[:, 7, :] + gt_seq[:, 11, :]) / 2 39 | 40 | if align is True: 41 | c, R, t = umeyama(estimated_root_seq, gt_root_seq) 42 | estimated_root_seq = estimated_root_seq.dot(R) * c + t 43 | 44 | distance = estimated_root_seq - gt_root_seq 45 | distance = np.linalg.norm(distance, axis=1) 46 | m_distance = np.mean(distance) 47 | return m_distance 48 | 49 | def align_skeleton_size(estimated_seq, gt_seq): 50 | estimated_seq = deepcopy(np.asarray(estimated_seq)) 51 | gt_seq = deepcopy(np.asarray(gt_seq)) 52 | aligned_pose_list = np.zeros_like(estimated_seq) 53 | for s in range(estimated_seq.shape[0]): 54 | pose_p = estimated_seq[s] 55 | pose_gt_bs = gt_seq[s] 56 | c, R, t = umeyama(pose_p, pose_gt_bs) 57 | pose_p = pose_p * c 58 | aligned_pose_list[s] = pose_p 59 | 60 | return aligned_pose_list 61 | 62 | def align_skeleton(estimated_seq, gt_seq, skeleton_model=None): 63 | estimated_seq = deepcopy(np.asarray(estimated_seq)) 64 | gt_seq = deepcopy(np.asarray(gt_seq)) 65 | if skeleton_model is not None: 66 | for i in range(len(estimated_seq)): 67 | estimated_seq[i] = skeleton_model.skeleton_resize_single( 68 | estimated_seq[i], 69 | bone_length_file='utils/fisheye/mean3D.mat') 70 | for i in range(len(gt_seq)): 71 | gt_seq[i] = skeleton_model.skeleton_resize_single( 72 | gt_seq[i], 73 | bone_length_file='utils/fisheye/mean3D.mat') 74 | 75 | aligned_pose_list = np.zeros_like(estimated_seq) 76 | for s in range(estimated_seq.shape[0]): 77 | pose_p = estimated_seq[s] 78 | pose_gt_bs = gt_seq[s] 79 | c, R, t = umeyama(pose_p, pose_gt_bs) 80 | pose_p = pose_p.dot(R) * c + t 81 | aligned_pose_list[s] = pose_p 82 | 83 | return aligned_pose_list, gt_seq 84 | 85 | 86 | def align_single_skeleton(estimated_pose, gt_pose, skeleton_model=None): 87 | estimated_pose = deepcopy(np.asarray(estimated_pose)) 88 | gt_pose = deepcopy(np.asarray(gt_pose)) 89 | if skeleton_model is not None: 90 | estimated_pose = skeleton_model.skeleton_resize_single( 91 | estimated_pose, 92 | bone_length_file='utils/fisheye/mean3D.mat') 93 | gt_pose = skeleton_model.skeleton_resize_single( 94 | gt_pose, 95 | bone_length_file='utils/fisheye/mean3D.mat') 96 | 97 | 98 | 99 | c, R, t = umeyama(estimated_pose, gt_pose) 100 | pose_p = estimated_pose.dot(R) * c + t 101 | 102 | return pose_p, gt_pose 103 | 104 | 105 | def calculate_joint_error(estimated_seq, gt_seq): 106 | estimated_seq = np.asarray(estimated_seq) 107 | gt_seq = np.asarray(gt_seq) 108 | distance = estimated_seq - gt_seq 109 | distance = np.linalg.norm(distance, axis=2) 110 | joints_distance = np.mean(distance, axis=0) 111 | return joints_distance 112 | 113 | 114 | def calculate_errors(final_estimated_seq, mid_estimated_seq, final_optimized_seq=None, final_gt_seq=None): 115 | skeleton_model = Skeleton(None) 116 | original_global_mpjpe = calculate_error(final_estimated_seq, final_gt_seq) 117 | mid_global_mpjpe = calculate_error(mid_estimated_seq, final_gt_seq) 118 | optimized_global_mpjpe = calculate_error(final_optimized_seq, final_gt_seq) 119 | 120 | original_camera_pos_error = calculate_slam_error(final_estimated_seq, final_gt_seq) 121 | optimized_camera_pos_error = calculate_slam_error(final_optimized_seq, final_gt_seq) 122 | 123 | 124 | 125 | # align the estimated result and original result 126 | 127 | aligned_estimated_seq_result = global_align_skeleton_seq(final_estimated_seq, final_gt_seq) 128 | aligned_estimated_mid_seq_result = global_align_skeleton_seq(mid_estimated_seq, final_gt_seq) 129 | aligned_optimized_seq_result = global_align_skeleton_seq(final_optimized_seq, final_gt_seq) 130 | 131 | original_aligned_camera_pos_error = calculate_slam_error(aligned_estimated_seq_result, final_gt_seq, align=False) 132 | mid_aligned_camera_pose_error = calculate_slam_error(aligned_estimated_mid_seq_result, final_gt_seq, align=False) 133 | optimized_aligned_camera_pos_error = calculate_slam_error(aligned_optimized_seq_result, final_gt_seq, align=False) 134 | 135 | aligned_original_seq_mpjpe = calculate_error(aligned_estimated_seq_result, final_gt_seq) 136 | aligned_mid_seq_mpjpe = calculate_error(aligned_estimated_mid_seq_result, final_gt_seq) 137 | aligned_optimized_seq_mpjpe = calculate_error(aligned_optimized_seq_result, final_gt_seq) 138 | 139 | # align the estimated result and original result 140 | aligned_estimated_result, final_gt_seq = align_skeleton(final_estimated_seq, final_gt_seq, None) 141 | aligned_mid_optimized_result, final_gt_seq = align_skeleton(mid_estimated_seq, final_gt_seq, None) 142 | aligned_optimized_result, final_gt_seq = align_skeleton(final_optimized_seq, final_gt_seq, None) 143 | 144 | aligned_original_mpjpe = calculate_error(aligned_estimated_result, final_gt_seq) 145 | aligned_mid_optimized_mpjpe = calculate_error(aligned_mid_optimized_result, final_gt_seq) 146 | aligned_optimized_mpjpe = calculate_error(aligned_optimized_result, final_gt_seq) 147 | 148 | # align the estimated result and original result 149 | aligned_estimated_result, final_gt_seq = align_skeleton(final_estimated_seq, final_gt_seq, skeleton_model) 150 | aligned_mid_optimized_result, final_gt_seq = align_skeleton(mid_estimated_seq, final_gt_seq, skeleton_model) 151 | aligned_optimized_result, final_gt_seq = align_skeleton(final_optimized_seq, final_gt_seq, skeleton_model) 152 | 153 | bone_length_aligned_original_mpjpe = calculate_error(aligned_estimated_result, final_gt_seq) 154 | bone_length_aligned_mid_optimized_mpjpe = calculate_error(aligned_mid_optimized_result, final_gt_seq) 155 | bone_length_aligned_optimized_mpjpe = calculate_error(aligned_optimized_result, final_gt_seq) 156 | joints_error = calculate_joint_error(aligned_optimized_result, final_gt_seq) 157 | 158 | from collections import OrderedDict 159 | result = OrderedDict({'original_global_mpjpe': original_global_mpjpe, 160 | 'mid_global_mpjpe': mid_global_mpjpe, 161 | 'optimized_global_mpjpe': optimized_global_mpjpe, 162 | 'original_camera_pos_error': original_camera_pos_error, 163 | 'optimized_camera_pos_error': optimized_camera_pos_error, 164 | 165 | 'original_aligned_camera_pos_error': original_aligned_camera_pos_error, 166 | 'mid_aligned_camera_pose_error': mid_aligned_camera_pose_error, 167 | 'optimized_aligned_camera_pos_error': optimized_aligned_camera_pos_error, 168 | 169 | 'original_aligned_global_mpjpe': aligned_original_seq_mpjpe, 170 | "aligned_mid_seq_mpjpe": aligned_mid_seq_mpjpe, 171 | 'optimized_aligned_global_mpjpe': aligned_optimized_seq_mpjpe, 172 | 'aligned_original_mpjpe': aligned_original_mpjpe, 173 | 'aligned_mid_optimized_mpjpe': aligned_mid_optimized_mpjpe, 174 | 'aligned_optimized_mpjpe': aligned_optimized_mpjpe, 175 | 'bone_length_aligned_original_mpjpe': bone_length_aligned_original_mpjpe, 176 | 'bone_length_aligned_mid_optimized_mpjpe': bone_length_aligned_mid_optimized_mpjpe, 177 | 'bone_length_aligned_optimized_mpjpe': bone_length_aligned_optimized_mpjpe, 178 | 'joints_error': joints_error}) 179 | return result 180 | -------------------------------------------------------------------------------- /MakeDataForOptimization/process_test_data.py: -------------------------------------------------------------------------------- 1 | # 1. slam result + local pose = global pose 2 | # 2. save global pose result 3 | import open3d 4 | from utils.skeleton import Skeleton 5 | import os 6 | from slam_reader import SLAMReader 7 | from scipy.io import loadmat 8 | import numpy as np 9 | from scipy.spatial.transform import Rotation 10 | from tqdm import tqdm 11 | from copy import deepcopy 12 | import pickle 13 | from natsort import natsorted 14 | 15 | 16 | class TestDataPreprocessor: 17 | def __init__(self, slam_result_path, heatmap_dir, depth_dir, gt_path, start_frame, end_frame, fps, 18 | mat_start_frame): 19 | self.heatmap_dir = heatmap_dir 20 | self.depth_dir = depth_dir 21 | self.slam_result_path = slam_result_path 22 | self.slam_reader = SLAMReader(fps=fps) 23 | self.start_frame = start_frame 24 | self.end_frame = end_frame 25 | self.skeleton_model = Skeleton(calibration_path='utils/fisheye/fisheye.calibration.json') 26 | self.gt_global_skeleton = self.load_gt_data(gt_path=gt_path, start_frame=start_frame, end_frame=end_frame, 27 | mat_start_frame=mat_start_frame) 28 | self.local_pose_list, self.heatmap_list = self.get_local_pose(self.heatmap_dir, self.depth_dir, 29 | self.start_frame, self.end_frame) 30 | self.trajectory, self.R, self.t = self.slam_reader.read_trajectory_new(self.slam_result_path, 31 | self.local_pose_list, 32 | self.gt_global_skeleton, 33 | start_frame=start_frame, 34 | end_frame=end_frame) 35 | 36 | 37 | 38 | def load_gt_data(self, gt_path, start_frame, end_frame, mat_start_frame): 39 | with open(gt_path, 'rb') as f: 40 | pose_gt = pickle.load(f) 41 | clip = [] 42 | for i in range(start_frame, end_frame): 43 | clip.append(pose_gt[i - mat_start_frame]) 44 | 45 | skeleton_list = clip 46 | 47 | return skeleton_list 48 | 49 | def get_local_pose(self, heatmaps_dir, depths_dir, start_frame, end_frame): 50 | local_pose_list = [] 51 | heatmap_list = [] 52 | heatmaps_filename_list = natsorted(os.listdir(heatmaps_dir))[start_frame: end_frame] 53 | depths_filename_list = natsorted(os.listdir(depths_dir))[start_frame: end_frame] 54 | 55 | for heatmaps_filename, depths_filename in tqdm(zip(heatmaps_filename_list, depths_filename_list)): 56 | heatmap_path = os.path.join(heatmaps_dir, heatmaps_filename) 57 | depth_path = os.path.join(depths_dir, depths_filename) 58 | 59 | self.skeleton_model.set_skeleton_from_file(heatmap_file=heatmap_path, 60 | depth_file=depth_path, 61 | # bone_length_file='/home/wangjian/Develop/BodyPoseRefiner/utils/fisheye/mean3D.mat', 62 | to_mesh=False) 63 | local_skeleton = self.skeleton_model.skeleton 64 | local_pose_list.append(local_skeleton) 65 | heatmap_mat = loadmat(heatmap_path) 66 | heatmap = heatmap_mat['heatmap'] 67 | heatmap_list.append(heatmap) 68 | return local_pose_list, heatmap_list 69 | 70 | def render_body_sequence(self, visualization=False): 71 | global_skeleton_list = [] 72 | for i in range(len(self.trajectory)): 73 | slam_coord = self.trajectory[i] 74 | local_skeleton = self.local_pose_list[i] 75 | local_skeleton_points = open3d.geometry.PointCloud() 76 | local_skeleton_points.points = open3d.utility.Vector3dVector(local_skeleton) 77 | local_skeleton_points = local_skeleton_points.transform(slam_coord) 78 | # local_skeleton_points = local_skeleton_points.rotate(-Rotation.from_matrix(self.R).as_euler('xyz'), 79 | # center=False) 80 | # local_skeleton_points = local_skeleton_points.translate(-self.t) 81 | global_skeleton = np.asarray(local_skeleton_points.points) 82 | global_skeleton_list.append(global_skeleton) 83 | # align head to zero position 84 | # head_position = deepcopy(global_skeleton_list[0][0]) 85 | # for i in range(len(global_skeleton_list)): 86 | # global_skeleton_list[i] -= head_position 87 | if visualization: 88 | mesh_list = [] 89 | for skeleton in global_skeleton_list: 90 | self.skeleton_model.skeleton = skeleton 91 | self.skeleton_model.skeleton_to_mesh() 92 | mesh_list.append(self.skeleton_model.skeleton_mesh) 93 | mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=1) 94 | open3d.visualization.draw_geometries(mesh_list + [mesh_frame]) 95 | 96 | return global_skeleton_list, mesh_list 97 | else: 98 | return global_skeleton_list, [] 99 | 100 | def rotate_gt_global_skeleton(self, gt_skeleton_list, R, t): 101 | result_gt_skeleton_list = [] 102 | for i in range(len(gt_skeleton_list)): 103 | gt_skeleton = gt_skeleton_list[i] 104 | gt_skeleton_points = open3d.geometry.PointCloud() 105 | gt_skeleton_points.points = open3d.utility.Vector3dVector(gt_skeleton) 106 | gt_skeleton_points = gt_skeleton_points.rotate(-Rotation.from_matrix(R).as_euler('xyz'), 107 | center=False) 108 | gt_skeleton_points = gt_skeleton_points.translate(t) 109 | gt_skeleton = np.asarray(gt_skeleton_points.points) 110 | result_gt_skeleton_list.append(gt_skeleton) 111 | return result_gt_skeleton_list 112 | 113 | def align_estimated_and_gt_skeleton(self, estimated_skeleton_list, gt_skeleton_list): 114 | estimated_skeleton_head_position = estimated_skeleton_list[0][0] 115 | gt_head_position = gt_skeleton_list[0][0] 116 | 117 | distance = estimated_skeleton_head_position - gt_head_position 118 | 119 | distance = deepcopy(distance) 120 | for gt_pose in gt_skeleton_list: 121 | gt_pose += distance 122 | return gt_skeleton_list 123 | 124 | 125 | def main(slam_result_path, heatmap_dir, depth_dir, gt_path, 126 | start_frame, end_frame, out_dir, fps, mat_start_frame): 127 | visualizer = TestDataPreprocessor( 128 | slam_result_path=slam_result_path, 129 | heatmap_dir=heatmap_dir, 130 | depth_dir=depth_dir, 131 | gt_path=gt_path, 132 | start_frame=start_frame, 133 | end_frame=end_frame, 134 | fps=fps, 135 | mat_start_frame=mat_start_frame) 136 | 137 | # out_dir = 'corrected_data/studio-lingjie1_visualization/{}'.format(out_data_id) 138 | if not os.path.isdir(out_dir): 139 | os.makedirs(out_dir) 140 | out_path = os.path.join(out_dir, 'test_data.pkl') 141 | 142 | local_estimated_skeleton_list = visualizer.local_pose_list 143 | heatmap_list = visualizer.heatmap_list 144 | slam_result_list = visualizer.trajectory 145 | 146 | estimated_skeleton_list, estimated_mesh_list = visualizer.render_body_sequence(visualization=False) 147 | rotated_gt_global_pose_list = visualizer.gt_global_skeleton 148 | 149 | out = { 150 | "gt_global_skeleton": rotated_gt_global_pose_list, 151 | 'estimated_global_skeleton': estimated_skeleton_list, 152 | 'estimated_local_skeleton': local_estimated_skeleton_list, 153 | 'camera_pose_list': slam_result_list, 154 | "heatmap_list":heatmap_list 155 | } 156 | with open(out_path, 'wb') as f: 157 | pickle.dump(out, f) 158 | 159 | mpjpe = [] 160 | for i in range(len(rotated_gt_global_pose_list)): 161 | distance = rotated_gt_global_pose_list[i] - estimated_skeleton_list[i] 162 | distance = np.linalg.norm(distance, axis=1) 163 | mpjpe.append(np.mean(distance)) 164 | print("The initial mpjpe is: {}".format(np.mean(mpjpe))) 165 | 166 | 167 | if __name__ == '__main__': 168 | slam_result_path = r'data/studio-lingjie1/frame_trajectory.txt', 169 | heatmap_dir = r'\\winfs-inf\HPS\Mo2Cap2Plus1\static00\EgocentricData\REC23102020\studio-lingjie1\heatmaps', 170 | depth_dir = r'\\winfs-inf\HPS\Mo2Cap2Plus1\static00\EgocentricData\REC23102020\studio-lingjie1\depths', 171 | gt_path = 'data/studio-lingjie1/lingjie1.pkl', 172 | 173 | total_start_frame = 551 174 | total_end_frame = 3300 175 | 176 | 177 | test_size = 100 178 | for i in range(total_start_frame, total_end_frame - test_size, test_size): 179 | print("running test sequence from {} to {}".format(i, i + test_size)) 180 | start_frame = i 181 | end_frame = i + test_size 182 | out_dir = 'corrected_data/studio-lingjie1_visualization/data_start_{}_end_{}'.format(start_frame, end_frame) 183 | main(slam_result_path, heatmap_dir, depth_dir, gt_path, 184 | start_frame, end_frame, out_dir, fps=25, mat_start_frame=total_start_frame) 185 | -------------------------------------------------------------------------------- /networks/train_local.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('..') 4 | import torch 5 | import numpy as np 6 | 7 | from config import args 8 | import datetime 9 | import os 10 | from torch.utils.data import DataLoader 11 | from tqdm import tqdm 12 | from torch.utils.tensorboard import SummaryWriter 13 | from torch.optim import Adam 14 | from pprint import pprint 15 | from models.SeqConvVAE import ConvVAE 16 | 17 | 18 | def prepare_log_dir(log_dir=None): 19 | # prepare log dirs 20 | if args.log_dir is None and log_dir is None: 21 | log_dir = datetime.datetime.now().strftime('%m.%d-%H:%M:%S') 22 | elif log_dir is None: 23 | log_dir = args.log_dir 24 | log_dir = os.path.join('logs', log_dir) 25 | print('making save dir at: {}'.format(log_dir)) 26 | os.makedirs(log_dir) 27 | checkpoints_dir = os.path.join(log_dir, 'checkpoints') 28 | tensorboard_dir = os.path.join(log_dir, 'tensorboard') 29 | os.makedirs(checkpoints_dir) 30 | os.makedirs(tensorboard_dir) 31 | return checkpoints_dir, tensorboard_dir 32 | 33 | 34 | class Train: 35 | def __init__(self): 36 | from dataset.local_dataset import AMASSDataset 37 | self.seq_length = args.seq_length 38 | self.attention = args.attention 39 | 40 | self.amass_train_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=True, 41 | windows_size=args.slide_window_step, balance_distrib=args.data_balance, 42 | fps=args.fps, with_mo2cap2_data=args.with_mo2cap2_data) 43 | self.train_dataloader = DataLoader(dataset=self.amass_train_dataset, batch_size=args.batch_size, shuffle=True, 44 | drop_last=True, num_workers=args.num_workers) 45 | 46 | self.test_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=False, 47 | windows_size=args.slide_window_step, balance_distrib=args.data_balance, 48 | fps=args.fps, with_mo2cap2_data=args.with_mo2cap2_data) 49 | self.test_dataloader = DataLoader(self.test_dataset, batch_size=args.batch_size, shuffle=False, 50 | drop_last=False, num_workers=args.num_workers) 51 | if args.network == 'cnn': 52 | self.network = ConvVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim, 53 | seq_len=self.seq_length) 54 | elif args.network == 'rnn': 55 | self.network = RecurrentVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim, 56 | seq_len=self.seq_length) 57 | elif args.network == 'attention': 58 | self.network = AttentionVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim, 59 | seq_len=self.seq_length, attention_type=self.attention) 60 | elif args.network == 'meva': 61 | self.network = MEVA(nx=45, t_total=self.seq_length, latent_size=args.latent_dim) 62 | 63 | elif args.network == 'vibe': 64 | self.network = RecurrentVAE_VIBE(in_channels=45, out_channels=45, seq_len=self.seq_length, 65 | latent_dim=args.latent_dim, hidden_size=args.latent_dim) 66 | elif args.network == 'mlp': 67 | from networks.models.MLP import MLPVAE 68 | self.network = MLPVAE(in_features=self.seq_length * 45, out_features=self.seq_length * 45, 69 | seq_len=self.seq_length, 70 | latent_dim=args.latent_dim) 71 | else: 72 | raise Exception('wrong network type!') 73 | 74 | self.optimizer = Adam(params=self.network.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) 75 | # self.l2_loss = torch.nn.MSELoss(reduction='elementwise_mean') 76 | # self.l2_loss = torch.nn.MSELoss() 77 | 78 | self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 79 | 80 | self.network = self.network.to(self.device) 81 | 82 | self.checkpoint_dir, tensorboard_dir = prepare_log_dir() 83 | self.tensorboard_writer = SummaryWriter(tensorboard_dir) 84 | 85 | 86 | def train(self): 87 | print('---------------------Start Training-----------------------') 88 | pprint(args.__dict__) 89 | running_loss = 0 90 | running_recon_loss = 0 91 | count = 0 92 | for e in range(args.epoch): 93 | self.network.train() 94 | print('-----------------Epoch: {}-----------------'.format(str(e))) 95 | if args.new_dataset: 96 | for i, (relative_global_pose, local_pose, camera_matrix) in tqdm(enumerate(self.train_dataloader)): 97 | self.optimizer.zero_grad() 98 | # relative_global_pose = relative_global_pose.to(self.device) 99 | local_pose = local_pose.to(self.device) 100 | # camera_matrix = camera_matrix.to(self.device) 101 | 102 | pose_preds, pose_input, mu, log_var = self.network(local_pose) 103 | loss, recon_loss, kld_loss = self.network.loss_function(pose_preds, pose_input, mu, log_var, 104 | M_N=args.kl_weight * 64 / len( 105 | self.amass_train_dataset)) 106 | loss.backward() 107 | self.optimizer.step() 108 | 109 | running_loss += loss.item() 110 | running_recon_loss += recon_loss.item() 111 | # logs 112 | if count % args.log_step == 0 and count != 0: 113 | # ...log the running loss 114 | self.tensorboard_writer.add_scalar('training loss', running_loss, count) 115 | self.tensorboard_writer.add_scalar('training local recon loss', running_recon_loss, count) 116 | print("running loss is: {}".format(running_loss)) 117 | print("running local recon loss is: {}".format(running_recon_loss)) 118 | running_loss = 0 119 | running_recon_loss = 0 120 | count += 1 121 | else: 122 | for i, local_pose in tqdm(enumerate(self.train_dataloader)): 123 | self.optimizer.zero_grad() 124 | local_pose = local_pose.to(self.device) 125 | 126 | pose_preds, pose_input, mu, log_var = self.network(local_pose) 127 | loss, recon_loss, kld_loss = self.network.loss_function(pose_preds, pose_input, mu, log_var, 128 | M_N=args.kl_weight * 64 / len( 129 | self.amass_train_dataset)) 130 | loss.backward() 131 | self.optimizer.step() 132 | 133 | running_loss += loss.item() 134 | running_recon_loss += recon_loss.item() 135 | # logs 136 | if count % args.log_step == 0 and count != 0: 137 | # ...log the running loss 138 | self.tensorboard_writer.add_scalar('training loss', running_loss, count) 139 | self.tensorboard_writer.add_scalar('training local recon loss', running_recon_loss, count) 140 | print("running loss is: {}".format(running_loss)) 141 | print("running local recon loss is: {}".format(running_recon_loss)) 142 | running_loss = 0 143 | running_recon_loss = 0 144 | count += 1 145 | # log the logs 146 | eval_loss = self.eval() 147 | self.tensorboard_writer.add_scalar('eval loss', eval_loss, e) 148 | print('eval loss is: {}'.format(eval_loss)) 149 | 150 | torch.save({ 151 | 'epoch': e + 1, 152 | 'args': args.__dict__, 153 | 'state_dict': self.network.state_dict(), 154 | 'eval_result': eval_loss, 155 | 'optimizer': self.optimizer.state_dict(), 156 | }, os.path.join(self.checkpoint_dir, str(e) + '.pth.tar')) 157 | 158 | def eval(self): 159 | print('---------------------Start Eval-----------------------') 160 | self.network.eval() 161 | mpjpe_list = [] 162 | with torch.no_grad(): 163 | if args.new_dataset: 164 | for i, (relative_global_pose, local_pose, camera_matrix) in tqdm(enumerate(self.test_dataloader)): 165 | local_pose = local_pose.to(self.device) 166 | pose_preds, pose_input, _, _ = self.network(local_pose) 167 | pose_preds = pose_preds.cpu().numpy() 168 | pose_input = pose_input.cpu().numpy() 169 | pose_preds = np.reshape(pose_preds, [-1, self.seq_length, 15, 3]) 170 | pose_gt = np.reshape(pose_input, [-1, self.seq_length, 15, 3]) 171 | mpjpe_list.append(self.mpjpe(pose_preds, pose_gt)) 172 | else: 173 | for i, local_pose in tqdm(enumerate(self.test_dataloader)): 174 | local_pose = local_pose.to(self.device) 175 | pose_preds, pose_input, _, _ = self.network(local_pose) 176 | pose_preds = pose_preds.cpu().numpy() 177 | pose_input = pose_input.cpu().numpy() 178 | pose_preds = np.reshape(pose_preds, [-1, self.seq_length, 15, 3]) 179 | pose_gt = np.reshape(pose_input, [-1, self.seq_length, 15, 3]) 180 | mpjpe_list.append(self.mpjpe(pose_preds, pose_gt)) 181 | 182 | print('MPJPE is: {}'.format(np.mean(mpjpe_list))) 183 | return np.mean(mpjpe_list) 184 | 185 | def mpjpe(self, pose_preds, pose_gt): 186 | distance = np.linalg.norm(pose_gt - pose_preds, axis=3) 187 | return np.mean(distance) 188 | 189 | 190 | if __name__ == '__main__': 191 | train = Train() 192 | train.train() 193 | -------------------------------------------------------------------------------- /networks/models/SeqConvVAE.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from models.BaseVAE import BaseVAE 3 | # from networks.BaseVAE import BaseVAE 4 | from torch import nn 5 | from torch.nn import functional as F 6 | from copy import deepcopy 7 | 8 | 9 | class ConvVAE(BaseVAE): 10 | kinematic_parents = [0, 0, 1, 2, 0, 4, 5, 1, 7, 8, 9, 4, 11, 12, 13] 11 | def __init__(self, 12 | in_channels, 13 | out_channels, 14 | latent_dim, 15 | seq_len, 16 | hidden_dims=None, 17 | with_bone_length=False, 18 | **kwargs) -> None: 19 | super(ConvVAE, self).__init__() 20 | 21 | self.latent_dim = latent_dim 22 | self.in_channels = in_channels 23 | self.out_channels = out_channels 24 | self.seq_len = seq_len 25 | self.with_bone_length = with_bone_length 26 | 27 | modules = [] 28 | self.encoder_hidden_dims = deepcopy(hidden_dims) 29 | if hidden_dims is None: 30 | hidden_dims = [64, 64, 128, 256, 512] 31 | 32 | # Build Encoder 33 | for h_dim in hidden_dims: 34 | modules.append( 35 | nn.Sequential( 36 | nn.Conv1d(in_channels, out_channels=h_dim, kernel_size=3, stride=1, padding=1), 37 | nn.BatchNorm1d(h_dim), 38 | nn.LeakyReLU()) 39 | ) 40 | in_channels = h_dim 41 | 42 | self.encoder = nn.Sequential(*modules) 43 | self.encoder_out_dim = hidden_dims[-1] * self.seq_len 44 | self.fc_mu = nn.Linear(self.encoder_out_dim, latent_dim) 45 | self.fc_var = nn.Linear(self.encoder_out_dim, latent_dim) 46 | 47 | if self.with_bone_length is True: 48 | self.bone_length_encode_layer = nn.Sequential( 49 | nn.Linear(in_features=self.seq_len * 15, out_features=512), 50 | nn.BatchNorm1d(num_features=512), 51 | nn.LeakyReLU() 52 | ) 53 | self.fusion_layer = nn.Sequential( 54 | nn.Linear(in_features=512 + hidden_dims[-1] * self.seq_len, out_features=hidden_dims[-1] * self.seq_len), 55 | nn.BatchNorm1d(num_features=hidden_dims[-1] * self.seq_len), 56 | nn.LeakyReLU() 57 | ) 58 | 59 | # Build Decoder 60 | modules = [] 61 | 62 | self.decoder_input = nn.Linear(latent_dim, hidden_dims[-1] * self.seq_len) 63 | 64 | hidden_dims.reverse() 65 | self.decoder_hidden_dims = deepcopy(hidden_dims) 66 | 67 | for i in range(len(hidden_dims) - 1): 68 | modules.append( 69 | nn.Sequential( 70 | nn.ConvTranspose1d(hidden_dims[i], 71 | hidden_dims[i + 1], 72 | kernel_size=3, 73 | stride=1, 74 | padding=1, 75 | output_padding=0), 76 | nn.BatchNorm1d(hidden_dims[i + 1]), 77 | nn.LeakyReLU()) 78 | ) 79 | 80 | self.decoder = nn.Sequential(*modules) 81 | 82 | self.final_layer = nn.Sequential( 83 | nn.ConvTranspose1d(hidden_dims[-1], 84 | hidden_dims[-1], 85 | kernel_size=3, 86 | stride=1, 87 | padding=1, 88 | output_padding=0), 89 | nn.BatchNorm1d(hidden_dims[-1]), 90 | nn.LeakyReLU(), 91 | nn.Conv1d(hidden_dims[-1], out_channels=self.out_channels, 92 | kernel_size=3, padding=1)) 93 | 94 | 95 | 96 | 97 | def encode(self, pose_input: torch.Tensor): 98 | """ 99 | Encodes the input by passing through the encoder network 100 | and returns the latent codes. 101 | :param pose_input: (Tensor) Input tensor to encoder [N x C x H x W] 102 | :return: (Tensor) List of latent codes 103 | """ 104 | result = self.encoder(pose_input) 105 | result = torch.flatten(result, start_dim=1) 106 | 107 | if self.with_bone_length is True: 108 | bone_length_encoding = self.bone_length_encoding(pose_input) 109 | result = torch.cat([result, bone_length_encoding], dim=1) 110 | result = self.fusion_layer(result) 111 | # Split the result into mu and var components 112 | # of the latent Gaussian distribution 113 | mu = self.fc_mu(result) 114 | log_var = self.fc_var(result) 115 | 116 | return [mu, log_var] 117 | 118 | def decode(self, z: torch.Tensor): 119 | """ 120 | Maps the given latent codes 121 | onto the image space. 122 | :param z: (Tensor) [B x D] 123 | :return: (Tensor) [B x C x H x W] 124 | """ 125 | result = self.decoder_input(z) 126 | result = result.view(-1, self.decoder_hidden_dims[0], self.seq_len) 127 | result = self.decoder(result) 128 | result = self.final_layer(result) 129 | return result 130 | 131 | def decode_to_bodypose(self, z: torch.Tensor): 132 | # result: (n_batch, 45, seq_len) 133 | # output: (n_batch, seq_len, 15, 3) 134 | result = self.decoder_input(z) 135 | result = result.view(-1, self.decoder_hidden_dims[0], self.seq_len) 136 | result = self.decoder(result) 137 | result = self.final_layer(result) 138 | assert result.shape[1] == 45 139 | output = result.permute([0, 2, 1]).view((-1, self.seq_len, 15, 3)) 140 | return output 141 | 142 | def calculate_bone_length(self, skeleton: torch.Tensor): 143 | # skeleton shape: (seq_len, 15 * 3) 144 | skeleton = skeleton.view((-1, self.seq_len, 15, 3)) 145 | bone_array = skeleton - skeleton[:, :, self.kinematic_parents, :] 146 | bone_length = torch.norm(bone_array, dim=3) 147 | return bone_length 148 | 149 | def bone_length_encoding(self, pose_input): 150 | pose_input = pose_input.view(-1, self.seq_len, 15, 3) 151 | # calculate the bone length 152 | bone_length = self.calculate_bone_length(pose_input) 153 | bone_length = bone_length.view(-1, self.seq_len * 15) 154 | bone_length_encoding = self.bone_length_encode_layer(bone_length) 155 | return bone_length_encoding 156 | 157 | 158 | 159 | def reparameterize(self, mu: torch.Tensor, logvar: torch.Tensor): 160 | """ 161 | Reparameterization trick to sample from N(mu, var) from 162 | N(0,1). 163 | :param mu: (Tensor) Mean of the latent Gaussian [B x D] 164 | :param logvar: (Tensor) Standard deviation of the latent Gaussian [B x D] 165 | :return: (Tensor) [B x D] 166 | """ 167 | std = torch.exp(0.5 * logvar) 168 | eps = torch.randn_like(std) 169 | return eps * std + mu 170 | 171 | def forward(self, pose_input: torch.Tensor, **kwargs): 172 | """ 173 | :param pose_input: shape: n_batch, seq_length, 15*3 174 | :param kwargs: 175 | :return: 176 | """ 177 | pose_input_permuted = pose_input.permute((0, 2, 1)).contiguous() 178 | mu, log_var = self.encode(pose_input_permuted) 179 | z = self.reparameterize(mu, log_var) 180 | out = self.decode(z) 181 | out = out.permute((0, 2, 1)) 182 | return out, pose_input, mu, log_var 183 | 184 | def get_latent_space(self, pose_input): 185 | pose_input_permuted = pose_input.permute((0, 2, 1)).contiguous() 186 | mu, log_var = self.encode(pose_input_permuted) 187 | std = torch.exp(0.5 * log_var) 188 | z = self.reparameterize(mu, log_var) 189 | return mu, std, z 190 | 191 | def loss_function(self, 192 | *args, 193 | **kwargs): 194 | """ 195 | Computes the VAE loss function. 196 | KL(N(\mu, \sigma), N(0, 1)) = \log \frac{1}{\sigma} + \frac{\sigma^2 + \mu^2}{2} - \frac{1}{2} 197 | :param args: 198 | :param kwargs: 199 | :return: 200 | """ 201 | recons = args[0] 202 | input = args[1] 203 | mu = args[2] 204 | log_var = args[3] 205 | 206 | if 'M_N' in kwargs: 207 | kld_weight = kwargs['M_N'] # Account for the minibatch samples from the dataset 208 | # print(kld_weight) 209 | recons_loss = F.mse_loss(recons, input) 210 | 211 | kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim=1), dim=0) 212 | else: 213 | kld_weight = kwargs['kl_weight'] 214 | recons_loss = F.mse_loss(recons, input, reduction='sum') 215 | 216 | kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim=1), dim=0) 217 | 218 | loss = recons_loss + kld_weight * kld_loss 219 | return loss, recons_loss, kld_loss 220 | 221 | def sample(self, num_samples, current_device, **kwargs): 222 | """ 223 | Samples from the latent space and return the corresponding 224 | image space map. 225 | :param num_samples: Number of samples 226 | :param current_device: Device to run the model 227 | :return: (Tensor) 228 | """ 229 | z = torch.randn(num_samples, self.latent_dim) 230 | 231 | z = z.to(current_device) 232 | 233 | samples = self.decode(z) 234 | samples = samples.permute((0, 2, 1)) 235 | return samples 236 | 237 | 238 | def generate(self, x: torch.Tensor, **kwargs): 239 | """ 240 | Given an input image x, returns the reconstructed image 241 | :param x: (Tensor) [B x C x H x W] 242 | :return: (Tensor) [B x C x H x W] 243 | """ 244 | 245 | return self.forward(x)[0] 246 | 247 | if __name__ == '__main__': 248 | convVAE = ConvVAE(in_channels=45, out_channels=45, latent_dim=128, seq_len=5, with_bone_length=True) 249 | pose = torch.zeros(4, 5, 15 * 3) 250 | # pose = pose.permute((0, 2, 1)).contiguous() 251 | result, pose_in, _, _ = convVAE(pose) 252 | print(result.shape) 253 | # print(pose_in) 254 | 255 | # sampled_result = convVAE.sample(20, torch.device('cpu')) 256 | # 257 | # print(sampled_result.shape) -------------------------------------------------------------------------------- /MakeDataForOptimization/bvh_reader/npybvh/bvh.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import re 3 | from transforms3d.euler import euler2mat, mat2euler 4 | 5 | 6 | class BvhJoint: 7 | def __init__(self, name, parent): 8 | self.name = name 9 | self.parent = parent 10 | self.offset = np.zeros(3) 11 | self.channels = [] 12 | self.children = [] 13 | 14 | def add_child(self, child): 15 | self.children.append(child) 16 | 17 | def __repr__(self): 18 | return self.name 19 | 20 | def position_animated(self): 21 | return any([x.endswith('position') for x in self.channels]) 22 | 23 | def rotation_animated(self): 24 | return any([x.endswith('rotation') for x in self.channels]) 25 | 26 | 27 | class Bvh: 28 | def __init__(self): 29 | self.joints = {} 30 | self.root = None 31 | self.keyframes = None 32 | self.frames = 0 33 | self.fps = 0 34 | 35 | def _parse_hierarchy(self, text): 36 | lines = re.split('\\s*\\n+\\s*', text) 37 | 38 | joint_stack = [] 39 | 40 | for line in lines: 41 | words = re.split('\\s+', line) 42 | instruction = words[0] 43 | 44 | if instruction == "JOINT" or instruction == "ROOT": 45 | parent = joint_stack[-1] if instruction == "JOINT" else None 46 | joint = BvhJoint(words[1], parent) 47 | self.joints[joint.name] = joint 48 | if parent: 49 | parent.add_child(joint) 50 | joint_stack.append(joint) 51 | if instruction == "ROOT": 52 | self.root = joint 53 | elif instruction == "CHANNELS": 54 | for i in range(2, len(words)): 55 | joint_stack[-1].channels.append(words[i]) 56 | elif instruction == "OFFSET": 57 | for i in range(1, len(words)): 58 | joint_stack[-1].offset[i - 1] = float(words[i]) 59 | elif instruction == "End": 60 | joint = BvhJoint(joint_stack[-1].name + "_end", joint_stack[-1]) 61 | joint_stack[-1].add_child(joint) 62 | joint_stack.append(joint) 63 | self.joints[joint.name] = joint 64 | elif instruction == '}': 65 | joint_stack.pop() 66 | 67 | def _add_pose_recursive(self, joint, offset, poses): 68 | pose = joint.offset + offset 69 | poses.append(pose) 70 | 71 | for c in joint.children: 72 | self._add_pose_recursive(c, pose, poses) 73 | 74 | def plot_hierarchy(self): 75 | import matplotlib.pyplot as plt 76 | from mpl_toolkits.mplot3d import axes3d, Axes3D 77 | poses = [] 78 | self._add_pose_recursive(self.root, np.zeros(3), poses) 79 | pos = np.array(poses) 80 | 81 | fig = plt.figure() 82 | ax = fig.add_subplot(111, projection='3d') 83 | ax.scatter(pos[:, 0], pos[:, 2], pos[:, 1]) 84 | ax.set_xlim(-30, 30) 85 | ax.set_ylim(-30, 30) 86 | ax.set_zlim(-30, 30) 87 | plt.show() 88 | 89 | def parse_motion(self, text): 90 | lines = re.split('\\s*\\n+\\s*', text) 91 | 92 | frame = 0 93 | for line in lines: 94 | if line == '': 95 | continue 96 | words = re.split('\\s+', line) 97 | 98 | if line.startswith("Frame Time:"): 99 | self.fps = round(1 / float(words[2])) 100 | continue 101 | if line.startswith("Frames:"): 102 | self.frames = int(words[1]) 103 | continue 104 | 105 | if self.keyframes is None: 106 | self.keyframes = np.empty((self.frames, len(words)), dtype=np.float32) 107 | 108 | for angle_index in range(len(words)): 109 | self.keyframes[frame, angle_index] = float(words[angle_index]) 110 | 111 | frame += 1 112 | 113 | def parse_string(self, text): 114 | hierarchy, motion = text.split("MOTION") 115 | self._parse_hierarchy(hierarchy) 116 | self.parse_motion(motion) 117 | 118 | def _extract_rotation(self, frame_pose, index_offset, joint): 119 | local_rotation = np.zeros(3) 120 | for channel in joint.channels: 121 | if channel.endswith("position"): 122 | continue 123 | if channel == "Xrotation": 124 | local_rotation[0] = frame_pose[index_offset] 125 | elif channel == "Yrotation": 126 | local_rotation[1] = frame_pose[index_offset] 127 | elif channel == "Zrotation": 128 | local_rotation[2] = frame_pose[index_offset] 129 | else: 130 | raise Exception(f"Unknown channel {channel}") 131 | index_offset += 1 132 | 133 | local_rotation = np.deg2rad(local_rotation) 134 | M_rotation = np.eye(3) 135 | for channel in joint.channels: 136 | if channel.endswith("position"): 137 | continue 138 | 139 | if channel == "Xrotation": 140 | euler_rot = np.array([local_rotation[0], 0., 0.]) 141 | elif channel == "Yrotation": 142 | euler_rot = np.array([0., local_rotation[1], 0.]) 143 | elif channel == "Zrotation": 144 | euler_rot = np.array([0., 0., local_rotation[2]]) 145 | else: 146 | raise Exception(f"Unknown channel {channel}") 147 | 148 | M_channel = euler2mat(*euler_rot) 149 | M_rotation = M_rotation.dot(M_channel) 150 | 151 | return M_rotation, index_offset 152 | 153 | def _extract_position(self, joint, frame_pose, index_offset): 154 | offset_position = np.zeros(3) 155 | for channel in joint.channels: 156 | if channel.endswith("rotation"): 157 | continue 158 | if channel == "Xposition": 159 | offset_position[0] = frame_pose[index_offset] 160 | elif channel == "Yposition": 161 | offset_position[1] = frame_pose[index_offset] 162 | elif channel == "Zposition": 163 | offset_position[2] = frame_pose[index_offset] 164 | else: 165 | raise Exception(f"Unknown channel {channel}") 166 | index_offset += 1 167 | 168 | return offset_position, index_offset 169 | 170 | def _recursive_apply_frame(self, joint, frame_pose, index_offset, p, r, M_parent, p_parent): 171 | if joint.position_animated(): 172 | offset_position, index_offset = self._extract_position(joint, frame_pose, index_offset) 173 | else: 174 | offset_position = np.zeros(3) 175 | 176 | if len(joint.channels) == 0: 177 | joint_index = list(self.joints.values()).index(joint) 178 | p[joint_index] = p_parent + M_parent.dot(joint.offset) 179 | r[joint_index] = mat2euler(M_parent) 180 | return index_offset 181 | 182 | if joint.rotation_animated(): 183 | M_rotation, index_offset = self._extract_rotation(frame_pose, index_offset, joint) 184 | else: 185 | M_rotation = np.eye(3) 186 | 187 | M = M_parent.dot(M_rotation) 188 | position = p_parent + M_parent.dot(joint.offset) + offset_position 189 | 190 | rotation = np.rad2deg(mat2euler(M)) 191 | joint_index = list(self.joints.values()).index(joint) 192 | p[joint_index] = position 193 | r[joint_index] = rotation 194 | 195 | for c in joint.children: 196 | index_offset = self._recursive_apply_frame(c, frame_pose, index_offset, p, r, M, position) 197 | 198 | return index_offset 199 | 200 | def frame_pose(self, frame): 201 | p = np.empty((len(self.joints), 3)) 202 | r = np.empty((len(self.joints), 3)) 203 | frame_pose = self.keyframes[frame] 204 | M_parent = np.zeros((3, 3)) 205 | M_parent[0, 0] = 1 206 | M_parent[1, 1] = 1 207 | M_parent[2, 2] = 1 208 | self._recursive_apply_frame(self.root, frame_pose, 0, p, r, M_parent, np.zeros(3)) 209 | 210 | return p, r 211 | 212 | def all_frame_poses(self): 213 | p = np.empty((self.frames, len(self.joints), 3)) 214 | r = np.empty((self.frames, len(self.joints), 3)) 215 | 216 | for frame in range(len(self.keyframes)): 217 | p[frame], r[frame] = self.frame_pose(frame) 218 | 219 | return p, r 220 | 221 | def _plot_pose(self, p, r, fig=None, ax=None): 222 | import matplotlib.pyplot as plt 223 | from mpl_toolkits.mplot3d import axes3d, Axes3D 224 | if fig is None: 225 | fig = plt.figure() 226 | if ax is None: 227 | ax = fig.add_subplot(111, projection='3d') 228 | 229 | ax.cla() 230 | ax.scatter(p[:, 0], p[:, 2], p[:, 1]) 231 | ax.set_xlim(-30, 30) 232 | ax.set_ylim(-30, 30) 233 | ax.set_zlim(-1, 59) 234 | 235 | plt.draw() 236 | plt.pause(0.001) 237 | 238 | def plot_frame(self, frame, fig=None, ax=None): 239 | p, r = self.frame_pose(frame) 240 | self._plot_pose(p, r, fig, ax) 241 | 242 | def joint_names(self): 243 | return self.joints.keys() 244 | 245 | def parse_file(self, path): 246 | with open(path, 'r') as f: 247 | self.parse_string(f.read()) 248 | 249 | def plot_all_frames(self): 250 | import matplotlib.pyplot as plt 251 | from mpl_toolkits.mplot3d import axes3d, Axes3D 252 | fig = plt.figure() 253 | ax = fig.add_subplot(111, projection='3d') 254 | for i in range(self.frames): 255 | self.plot_frame(i, fig, ax) 256 | 257 | def __repr__(self): 258 | return f"BVH {len(self.joints.keys())} joints, {self.frames} frames" 259 | 260 | 261 | if __name__ == '__main__': 262 | # create Bvh parser 263 | anim = Bvh() 264 | # parser file 265 | anim.parse_file("example.bvh") 266 | 267 | # draw the skeleton in T-pose 268 | anim.plot_hierarchy() 269 | 270 | # extract single frame pose: axis0=joint, axis1=positionXYZ/rotationXYZ 271 | p, r = anim.frame_pose(0) 272 | 273 | # extract all poses: axis0=frame, axis1=joint, axis2=positionXYZ/rotationXYZ 274 | all_p, all_r = anim.all_frame_poses() 275 | 276 | # print all joints, their positions and orientations 277 | for _p, _r, _j in zip(p, r, anim.joint_names()): 278 | print(f"{_j}: p={_p}, r={_r}") 279 | 280 | # draw the skeleton for the given frame 281 | anim.plot_frame(22) 282 | 283 | # show full animation 284 | anim.plot_all_frames() 285 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import shutil 4 | import numpy as np 5 | from scipy.spatial.transform import Rotation 6 | from copy import deepcopy 7 | import torch 8 | import cv2 9 | 10 | 11 | lines = [(0, 1, 'right'), (0, 4, 'left'), (1, 2, 'right'), (2, 3, 'right'), (4, 5, 'left'), (5, 6, 'left'), (1, 7, 'right'), (4, 11, 'left'), (7, 8, 'right'), (8, 9, 'right'), (9, 10, 'right'), 12 | (11, 12, 'left'), (12, 13, 'left'), (13, 14, 'left'), (7, 11, 'left')] 13 | 14 | def draw_joints(joints, img, color=(0, 255, 0), right_color=(255, 0, 0)): 15 | joints_num = joints.shape[0] 16 | for line in lines: 17 | if line[0] < joints_num and line[1] < joints_num: 18 | start = joints[line[0]].astype(np.int) 19 | end = joints[line[1]].astype(np.int) 20 | left_or_right = line[2] 21 | if left_or_right == 'right': 22 | paint_color = right_color 23 | else: 24 | paint_color = color 25 | img = cv2.line(img, (start[0], start[1]), (end[0], end[1]), color=paint_color, thickness=4) 26 | for j in range(joints_num): 27 | img = cv2.circle(img, center=(joints[j][0].astype(np.int), joints[j][1].astype(np.int)), 28 | radius=2, color=(0, 0, 255), thickness=6) 29 | 30 | return img 31 | 32 | 33 | def trans_qrot_to_matrix(trans, rot): 34 | # rot is quat 35 | rot_mat = Rotation.from_quat(rot).as_matrix() 36 | mat = np.array([ 37 | np.concatenate([rot_mat[0], [trans[0]]]), 38 | np.concatenate([rot_mat[1], [trans[1]]]), 39 | np.concatenate([rot_mat[2], [trans[2]]]), 40 | [0, 0, 0, 1] 41 | ]) 42 | return mat 43 | 44 | def transformation_matrix_to_translation_and_rotation(mat): 45 | rotation_matrix = mat[:3, :3] 46 | translation = mat[:3, 3] 47 | 48 | # rotation matrix to rotation euler 49 | rotation_euler = Rotation.from_matrix(rotation_matrix).as_euler('xyz') 50 | return rotation_euler, translation 51 | 52 | 53 | def local_skeleton_2_global_skeleton(local_pose_list, cam_2_world_mat): 54 | pass 55 | 56 | def global_skeleton_2_local_skeleton(global_pose, world_2_cam_mat): 57 | global_pose_homo = np.concatenate([global_pose, np.ones(1)], axis=1) 58 | local_pose_homo = world_2_cam_mat.dot(global_pose_homo.T).T 59 | return local_pose_homo 60 | 61 | 62 | def transform_pose(pose, matrix): 63 | pose_homo = np.concatenate([pose, np.ones(shape=(pose.shape[0], 1))], axis=1) 64 | new_pose_homo = matrix.dot(pose_homo.T).T 65 | new_pose = new_pose_homo[:, :3] 66 | return new_pose 67 | 68 | def transform_pose_torch(pose, matrix): 69 | to_attach = torch.ones(size=(pose.shape[0], 1)).to(pose.device) 70 | pose_homo = torch.cat([pose, to_attach], dim=1) 71 | new_pose_homo = matrix.mm(pose_homo.T).T 72 | new_pose = new_pose_homo[:, :3] 73 | return new_pose 74 | 75 | def get_concecutive_global_cam(cam_seq, last_cam): 76 | camera_matrix_0 = deepcopy(cam_seq[0]) 77 | concecutive_global_cam = np.empty_like(cam_seq) 78 | camera_matrix_0_inv = np.linalg.inv(camera_matrix_0) 79 | for i, camera_pose_i in enumerate(cam_seq): 80 | camera_matrix_i = camera_pose_i 81 | camera_matrix_i_2_last = last_cam.dot(camera_matrix_0_inv.dot(camera_matrix_i)) 82 | concecutive_global_cam[i] = camera_matrix_i_2_last 83 | return concecutive_global_cam 84 | 85 | def get_relative_global_pose(local_pose_list, camera_pose_list): 86 | # firstly get relative camera pose list 87 | relative_pose_list = [] 88 | camera_pose_0 = deepcopy(camera_pose_list[0]) 89 | camera_matrix_0 = trans_qrot_to_matrix(camera_pose_0['loc'], camera_pose_0['rot']) 90 | camera_matrix_0_inv = np.linalg.inv(camera_matrix_0) 91 | for i, camera_pose_i in enumerate(camera_pose_list): 92 | camera_matrix_i = trans_qrot_to_matrix(camera_pose_i['loc'], camera_pose_i['rot']) 93 | camera_matrix_i_2_0 = camera_matrix_0_inv.dot(camera_matrix_i) 94 | local_pose = local_pose_list[i] 95 | transformed_local_pose = transform_pose(local_pose, camera_matrix_i_2_0) 96 | relative_pose_list.append(transformed_local_pose) 97 | return relative_pose_list 98 | 99 | def get_relative_global_pose_with_camera_matrix(local_pose_list, camera_pose_list): 100 | # firstly get relative camera pose list 101 | relative_pose_list = [] 102 | camera_pose_0 = deepcopy(camera_pose_list[0]) 103 | camera_matrix_0 = camera_pose_0 104 | camera_matrix_0_inv = np.linalg.inv(camera_matrix_0) 105 | for i, camera_pose_i in enumerate(camera_pose_list): 106 | camera_matrix_i = camera_pose_i 107 | camera_matrix_i_2_0 = camera_matrix_0_inv.dot(camera_matrix_i) 108 | local_pose = local_pose_list[i] 109 | transformed_local_pose = transform_pose(local_pose, camera_matrix_i_2_0) 110 | relative_pose_list.append(transformed_local_pose) 111 | 112 | return np.asarray(relative_pose_list) 113 | 114 | def get_global_pose_from_relative_global_pose(relative_global_pose_list, initial_camera_matrix): 115 | global_pose_list = np.zeros_like(relative_global_pose_list) 116 | for i, relative_global_pose in enumerate(relative_global_pose_list): 117 | global_pose = transform_pose(relative_global_pose, initial_camera_matrix) 118 | global_pose_list[i] = global_pose 119 | return global_pose_list 120 | 121 | def get_relative_camera_matrix(camera_pose_1, camera_pose_2): 122 | camera_matrix_1_inv = torch.inverse(camera_pose_1) 123 | camera_matrix_2_to_1 = camera_matrix_1_inv @ camera_pose_2 124 | return camera_matrix_2_to_1 125 | 126 | def get_relative_global_pose_with_camera_matrix_torch(local_pose_list, camera_pose_list): 127 | # firstly get relative camera pose list 128 | # relative_pose_list = [] 129 | relative_pose_list = torch.zeros_like(local_pose_list) 130 | camera_pose_0 = camera_pose_list[0].detach().clone() 131 | camera_matrix_0 = camera_pose_0 132 | camera_matrix_0_inv = torch.inverse(camera_matrix_0) 133 | for i, camera_pose_i in enumerate(camera_pose_list): 134 | camera_matrix_i = camera_pose_i 135 | camera_matrix_i_2_0 = camera_matrix_0_inv.mm(camera_matrix_i) 136 | local_pose = local_pose_list[i] 137 | transformed_local_pose = transform_pose_torch(local_pose, camera_matrix_i_2_0) 138 | relative_pose_list[i] = transformed_local_pose 139 | return relative_pose_list 140 | 141 | def get_relative_transform_from_blender(location1, rotation1, location2, rotation2): 142 | # input: location and rotation in blender coordinate 143 | # out: rotation, translation and transform matrix in OpenCV coordinate 144 | T_world2cv1, R_world2cv1, mat_world2cv1 = get_cv_rt_from_blender(location1, rotation1) 145 | T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_blender(location2, rotation2) 146 | 147 | mat_cv1_2world = np.linalg.inv(mat_world2cv1) 148 | 149 | mat_cv1_to_cv2 = mat_cv1_2world.dot(mat_world2cv2) 150 | # mat cv1 to cv2 is the coordinate transformation, we need to change it to the object transformation 151 | mat_cv2_to_cv1 = np.linalg.inv(mat_cv1_to_cv2) 152 | 153 | rotation, translation = transformation_matrix_to_translation_and_rotation(mat_cv2_to_cv1) 154 | return rotation, translation, mat_cv2_to_cv1 155 | 156 | 157 | def get_transform_relative_to_base_cv(base_location, base_rotation, location, rotation): 158 | # input: location and rotation in blender coordinate 159 | # out: rotation, translation and transform matrix in OpenCV coordinate 160 | T_world2cv_base, R_world2cv_base, mat_world2cv_base = get_cv_rt_from_cv(base_location, base_rotation) 161 | T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_cv(location, rotation) 162 | 163 | # mat_cv2world2 = np.linalg.inv(mat_world2cv2) 164 | # location_cv_homo = mat_cv2world2[:, 3] 165 | 166 | location_cv_homo = np.concatenate([location, np.ones(1)]) 167 | 168 | R_cv2_2_base = R_world2cv2.T.dot(R_world2cv_base) 169 | new_rotation_euler = Rotation.from_matrix(R_cv2_2_base).as_euler(seq='xyz') 170 | 171 | new_location_homo = mat_world2cv_base.dot(location_cv_homo) 172 | new_location = new_location_homo[:3] 173 | 174 | return new_location, new_rotation_euler 175 | 176 | def get_transform_relative_to_base_blender(base_location, base_rotation, location, rotation): 177 | T_world2cv_base, R_world2cv_base, mat_world2cv_base = get_cv_rt_from_blender(base_location, base_rotation) 178 | T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_blender(location, rotation) 179 | 180 | location_cv_homo = np.concatenate([location, np.ones(1)]) 181 | 182 | R_cv2_2_base = R_world2cv2.T.dot(R_world2cv_base) 183 | new_rotation_euler = Rotation.from_matrix(R_cv2_2_base).as_euler(seq='xyz') 184 | 185 | new_location_homo = mat_world2cv_base.dot(location_cv_homo) 186 | new_location = new_location_homo[:3] 187 | 188 | return new_location, new_rotation_euler 189 | 190 | # code modified from zaw lin 191 | def get_cv_rt_from_blender(location, rotation): 192 | # bcam stands for blender camera 193 | R_bcam2cv = np.array( 194 | [[1, 0, 0], 195 | [0, -1, 0], 196 | [0, 0, -1]]) 197 | 198 | # Transpose since the rotation is object rotation, 199 | # and we want coordinate rotation 200 | # R_world2bcam = cam.rotation_euler.to_matrix().transposed() 201 | # T_world2bcam = -1*R_world2bcam * location 202 | # 203 | # Use matrix_world instead to account for all constraints 204 | 205 | R_world2bcam = Rotation.from_euler('xyz', rotation, degrees=False).as_matrix().T 206 | 207 | # Convert camera location to translation vector used in coordinate changes 208 | # T_world2bcam = -1*R_world2bcam*cam.location 209 | # Use location from matrix_world to account for constraints: 210 | T_world2bcam = -1 * R_world2bcam.dot(location) 211 | 212 | # Build the coordinate transform matrix from world to computer vision camera 213 | R_world2cv = R_bcam2cv.dot(R_world2bcam) 214 | T_world2cv = R_bcam2cv.dot(T_world2bcam) 215 | 216 | #put into 3x4 matrix 217 | mat = np.array([ 218 | np.concatenate([R_world2cv[0], [T_world2cv[0]]]), 219 | np.concatenate([R_world2cv[1], [T_world2cv[1]]]), 220 | np.concatenate([R_world2cv[2], [T_world2cv[2]]]), 221 | [0, 0, 0, 1] 222 | ]) 223 | return T_world2cv, R_world2cv, mat 224 | 225 | # code modified from zaw lin 226 | def get_cv_rt_from_cv(location, rotation): 227 | 228 | # Transpose since the rotation is object rotation, 229 | # and we want coordinate rotation 230 | # R_world2bcam = cam.rotation_euler.to_matrix().transposed() 231 | # T_world2bcam = -1*R_world2bcam * location 232 | # 233 | # Use matrix_world instead to account for all constraints 234 | 235 | R_world2cv = Rotation.from_euler('xyz', rotation, degrees=False).as_matrix().T 236 | 237 | # Convert camera location to translation vector used in coordinate changes 238 | # T_world2bcam = -1*R_world2bcam*cam.location 239 | # Use location from matrix_world to account for constraints: 240 | T_world2cv = -1 * R_world2cv.dot(location) 241 | 242 | #put into 3x4 matrix 243 | mat = np.array([ 244 | np.concatenate([R_world2cv[0], [T_world2cv[0]]]), 245 | np.concatenate([R_world2cv[1], [T_world2cv[1]]]), 246 | np.concatenate([R_world2cv[2], [T_world2cv[2]]]), 247 | [0, 0, 0, 1] 248 | ]) 249 | return T_world2cv, R_world2cv, mat -------------------------------------------------------------------------------- /utils/skeleton.py: -------------------------------------------------------------------------------- 1 | # pose visualizer 2 | # 1. read and generate 3D skeleton from heat map and depth 3 | # 2. convert 3D skeleton to skeleton mesh 4 | from utils.fisheye.FishEyeEquisolid import FishEyeCameraEquisolid 5 | from utils.fisheye.FishEyeCalibrated import FishEyeCameraCalibrated 6 | import numpy as np 7 | import open3d 8 | from utils.pose_visualization_utils import get_cylinder, get_sphere 9 | from scipy.io import loadmat 10 | import cv2 11 | import os 12 | from tqdm import tqdm 13 | from scipy.ndimage.filters import gaussian_filter1d 14 | 15 | 16 | class Skeleton: 17 | heatmap_sequence = ["Neck", "Right_shoulder", "Right_elbow", "Right_wrist", "Left_shoulder", "Left_elbow", 18 | "Left_wrist", "Right_hip", "Right_knee", "Right_ankle", "Right_foot", "Left_hip", 19 | "Left_knee", "Left_ankle", "Left_foot"] 20 | lines = [(0, 1), (0, 4), (1, 2), (2, 3), (4, 5), (5, 6), (1, 7), (4, 11), (7, 8), (8, 9), (9, 10), 21 | (11, 12), (12, 13), (13, 14), (7, 11)] 22 | kinematic_parents = [0, 0, 1, 2, 0, 4, 5, 1, 7, 8, 9, 4, 11, 12, 13] 23 | 24 | def __init__(self, calibration_path): 25 | 26 | self.skeleton = None 27 | self.skeleton_mesh = None 28 | if calibration_path is None: 29 | self.camera = FishEyeCameraEquisolid(focal_length=9, sensor_size=32, img_size=(1280, 1024)) 30 | else: 31 | self.camera = FishEyeCameraCalibrated(calibration_file_path=calibration_path) 32 | 33 | def set_skeleton(self, heatmap, depth, bone_length=None, to_mesh=True): 34 | heatmap = np.expand_dims(heatmap, axis=0) 35 | preds, _ = self.get_max_preds(heatmap) 36 | pred = preds[0] 37 | 38 | points_3d = self.camera.camera2world(pred, depth) 39 | # print('------------------------') 40 | # print(self.camera.camera2world(np.array([[640, 1000]]), np.array([1]))) 41 | if bone_length is not None: 42 | points_3d = self._skeleton_resize(points_3d, bone_length) 43 | self.skeleton = points_3d 44 | if to_mesh: 45 | self.skeleton_to_mesh() 46 | return self.skeleton 47 | 48 | def joints_2_mesh(self, joints_3d): 49 | self.skeleton = joints_3d 50 | self.skeleton_to_mesh() 51 | skeleton_mesh = self.skeleton_mesh 52 | self.skeleton_mesh = None 53 | self.skeleton = None 54 | return skeleton_mesh 55 | 56 | def joint_list_2_mesh_list(self, joints_3d_list): 57 | mesh_list = [] 58 | for joints_3d in joints_3d_list: 59 | mesh_list.append(self.joints_2_mesh(joints_3d)) 60 | return mesh_list 61 | 62 | def get_skeleton_mesh(self): 63 | if self.skeleton_mesh is None: 64 | raise Exception("Skeleton is not prepared.") 65 | else: 66 | return self.skeleton_mesh 67 | 68 | def save_skeleton_mesh(self, out_path): 69 | if self.skeleton_mesh is None: 70 | raise Exception("Skeleton is not prepared.") 71 | else: 72 | open3d.io.write_triangle_mesh(out_path, mesh=self.skeleton_mesh) 73 | 74 | def set_skeleton_from_file(self, heatmap_file, depth_file, bone_length_file=None, to_mesh=True): 75 | # load the average bone length 76 | if bone_length_file is not None: 77 | bone_length_mat = loadmat(bone_length_file) 78 | mean3D = bone_length_mat['mean3D'].T # convert shape to 15 * 3 79 | bones_mean = mean3D - mean3D[self.kinematic_parents, :] 80 | bone_length = np.linalg.norm(bones_mean, axis=1) 81 | else: 82 | bone_length = None 83 | heatmap_mat = loadmat(heatmap_file) 84 | depth_mat = loadmat(depth_file) 85 | depth = depth_mat['depth'][0] 86 | heatmap = heatmap_mat['heatmap'] 87 | heatmap = cv2.resize(heatmap, dsize=(1024, 1024), interpolation=cv2.INTER_NEAREST) 88 | heatmap = np.pad(heatmap, ((0, 0), (128, 128), (0, 0)), 'constant', constant_values=0) 89 | heatmap = heatmap.transpose((2, 0, 1)) 90 | return self.set_skeleton(heatmap, depth, bone_length, to_mesh) 91 | 92 | def skeleton_resize_seq(self, joint_list, bone_length_file): 93 | bone_length_mat = loadmat(bone_length_file) 94 | mean3D = bone_length_mat['mean3D'].T # convert shape to 15 * 3 95 | bones_mean = mean3D - mean3D[self.kinematic_parents, :] 96 | bone_length = np.linalg.norm(bones_mean, axis=1) 97 | 98 | for i in range(len(joint_list)): 99 | joint_list[i] = self._skeleton_resize(joint_list[i], bone_length) 100 | return joint_list 101 | 102 | def skeleton_resize_single(self, joint, bone_length_file): 103 | bone_length_mat = loadmat(bone_length_file) 104 | mean3D = bone_length_mat['mean3D'].T # convert shape to 15 * 3 105 | bones_mean = mean3D - mean3D[self.kinematic_parents, :] 106 | bone_length = np.linalg.norm(bones_mean, axis=1) 107 | 108 | joint = self._skeleton_resize(joint, bone_length) 109 | return joint 110 | 111 | def skeleton_resize_standard_skeleton(self, joint_input, joint_standard): 112 | """ 113 | 114 | :param joint_input: input joint shape: 15 * 3 115 | :param joint_standard: standard joint shape: 15 * 3 116 | :return: 117 | """ 118 | bones_mean = joint_standard - joint_standard[self.kinematic_parents, :] 119 | bone_length = np.linalg.norm(bones_mean, axis=1) * 1000. 120 | 121 | joint = self._skeleton_resize(joint_input, bone_length) 122 | return joint 123 | 124 | def _skeleton_resize(self, points_3d, bone_length): 125 | # resize the skeleton to the normal size (why we should do that?) 126 | estimated_bone_vec = points_3d - points_3d[self.kinematic_parents, :] 127 | estimated_bone_length = np.linalg.norm(estimated_bone_vec, axis=1) 128 | multi = bone_length[1:] / estimated_bone_length[1:] 129 | multi = np.concatenate(([0], multi)) 130 | multi = np.stack([multi] * 3, axis=1) 131 | resized_bones_vec = estimated_bone_vec * multi / 1000 132 | 133 | joints_rescaled = points_3d 134 | for i in range(joints_rescaled.shape[0]): 135 | joints_rescaled[i, :] = joints_rescaled[self.kinematic_parents[i], :] + resized_bones_vec[i, :] 136 | return joints_rescaled 137 | 138 | def render(self): 139 | mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=1) 140 | open3d.visualization.draw_geometries([self.skeleton_mesh, mesh_frame]) 141 | 142 | def skeleton_to_mesh(self): 143 | final_mesh = open3d.geometry.TriangleMesh() 144 | for i in range(len(self.skeleton)): 145 | keypoint_mesh = get_sphere(position=self.skeleton[i], radius=0.02) 146 | final_mesh = final_mesh + keypoint_mesh 147 | 148 | for line in self.lines: 149 | line_start_i = line[0] 150 | line_end_i = line[1] 151 | 152 | start_point = self.skeleton[line_start_i] 153 | end_point = self.skeleton[line_end_i] 154 | 155 | line_mesh = get_cylinder(start_point, end_point, radius=0.005) 156 | final_mesh += line_mesh 157 | self.skeleton_mesh = final_mesh 158 | return final_mesh 159 | 160 | def smooth(self, pose_sequence, sigma): 161 | """ 162 | gaussian smooth pose 163 | :param pose_sequence_2d: pose sequence, input is a list with every element is 15 * 2 body pose 164 | :param kernel_size: kernel size of guassian smooth 165 | :return: smoothed 2d pose 166 | """ 167 | pose_sequence = np.asarray(pose_sequence) 168 | pose_sequence_result = np.zeros_like(pose_sequence) 169 | keypoint_num = pose_sequence.shape[1] 170 | for i in range(keypoint_num): 171 | pose_sequence_i = pose_sequence[:, i, :] 172 | pose_sequence_filtered = gaussian_filter1d(pose_sequence_i, sigma, axis=0) 173 | pose_sequence_result[:, i, :] = pose_sequence_filtered 174 | return pose_sequence_result 175 | 176 | def get_max_preds(self, batch_heatmaps): 177 | ''' 178 | get predictions from score maps 179 | heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) 180 | ''' 181 | assert isinstance(batch_heatmaps, np.ndarray), \ 182 | 'batch_heatmaps should be numpy.ndarray' 183 | assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' 184 | 185 | batch_size = batch_heatmaps.shape[0] 186 | num_joints = batch_heatmaps.shape[1] 187 | width = batch_heatmaps.shape[3] 188 | heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) 189 | idx = np.argmax(heatmaps_reshaped, 2) 190 | maxvals = np.amax(heatmaps_reshaped, 2) 191 | 192 | maxvals = maxvals.reshape((batch_size, num_joints, 1)) 193 | idx = idx.reshape((batch_size, num_joints, 1)) 194 | 195 | preds = np.tile(idx, (1, 1, 2)).astype(np.float32) 196 | 197 | preds[:, :, 0] = (preds[:, :, 0]) % width 198 | preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) 199 | 200 | pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) 201 | pred_mask = pred_mask.astype(np.float32) 202 | 203 | preds *= pred_mask 204 | return preds, maxvals 205 | 206 | 207 | if __name__ == '__main__': 208 | skeleton = Skeleton( 209 | calibration_path='/home/wangjian/Develop/egocentricvisualization/pose/fisheye/fisheye.calibration.json') 210 | data_path = r'/home/wangjian/Develop/egocentricvisualization/data_2' 211 | heatmap_dir = os.path.join(data_path, 'heatmaps') 212 | depth_dir = os.path.join(data_path, 'depths') 213 | out_dir = os.path.join(data_path, 'smooth_skeleton_mesh') 214 | if not os.path.isdir(out_dir): 215 | os.mkdir(out_dir) 216 | skeleon_list = [] 217 | out_path_list = [] 218 | for heatmap_name in tqdm(sorted(os.listdir(heatmap_dir))): 219 | heatmap_path = os.path.join(heatmap_dir, heatmap_name) 220 | mat_id = heatmap_name 221 | depth_path = os.path.join(depth_dir, mat_id) 222 | 223 | skeleton_array = skeleton.set_skeleton_from_file(heatmap_path, 224 | depth_path, 225 | # bone_length_file=r'/home/wangjian/Develop/egocentricvisualization/pose/fisheye/mean3D.mat', 226 | to_mesh=False) 227 | out_path = os.path.join(out_dir, mat_id + ".ply") 228 | skeleon_list.append(skeleton_array) 229 | out_path_list.append(out_path) 230 | 231 | smoothed_skeleton = skeleton.smooth(skeleon_list, sigma=1) 232 | print("saving to ply") 233 | for i in tqdm(range(len(smoothed_skeleton))): 234 | skeleton.skeleton = smoothed_skeleton[i] 235 | skeleton.skeleton_to_mesh() 236 | skeleton.save_skeleton_mesh(out_path_list[i]) 237 | 238 | # skeleton.set_skeleton_from_file(r'X:\Mo2Cap2Plus\static00\Datasets\Mo2Cap2\ego_system_test\sitting\heatmaps\img-04052020001910-937.mat', 239 | # r'X:\Mo2Cap2Plus\static00\Datasets\Mo2Cap2\ego_system_test\sitting\depths\img-04052020001910-937.mat', 240 | # # bone_length_file=r'F:\Develop\EgocentricSystemVisualization\pose\fisheye\mean3D.mat') 241 | # ) 242 | # 243 | # skeleton.render() 244 | # print(skeleton.skeleton) 245 | -------------------------------------------------------------------------------- /MakeDataForOptimization/utils/skeleton.py: -------------------------------------------------------------------------------- 1 | # pose visualizer 2 | # 1. read and generate 3D skeleton from heat map and depth 3 | # 2. convert 3D skeleton to skeleton mesh 4 | from utils.fisheye.FishEyeEquisolid import FishEyeCameraEquisolid 5 | from utils.fisheye.FishEyeCalibrated import FishEyeCameraCalibrated 6 | import numpy as np 7 | import open3d 8 | from utils.pose_visualization_utils import get_cylinder, get_sphere 9 | from scipy.io import loadmat 10 | import cv2 11 | import os 12 | from tqdm import tqdm 13 | from scipy.ndimage.filters import gaussian_filter1d 14 | 15 | 16 | class Skeleton: 17 | heatmap_sequence = ["Neck", "Right_shoulder", "Right_elbow", "Right_wrist", "Left_shoulder", "Left_elbow", 18 | "Left_wrist", "Right_hip", "Right_knee", "Right_ankle", "Right_foot", "Left_hip", 19 | "Left_knee", "Left_ankle", "Left_foot"] 20 | lines = [(0, 1), (0, 4), (1, 2), (2, 3), (4, 5), (5, 6), (1, 7), (4, 11), (7, 8), (8, 9), (9, 10), 21 | (11, 12), (12, 13), (13, 14), (7, 11)] 22 | kinematic_parents = [0, 0, 1, 2, 0, 4, 5, 1, 7, 8, 9, 4, 11, 12, 13] 23 | 24 | def __init__(self, calibration_path): 25 | 26 | self.skeleton = None 27 | self.skeleton_mesh = None 28 | if calibration_path is None: 29 | self.camera = FishEyeCameraEquisolid(focal_length=9, sensor_size=32, img_size=(1280, 1024)) 30 | else: 31 | self.camera = FishEyeCameraCalibrated(calibration_file_path=calibration_path) 32 | 33 | def set_skeleton(self, heatmap, depth, bone_length=None, to_mesh=True): 34 | heatmap = np.expand_dims(heatmap, axis=0) 35 | preds, _ = self.get_max_preds(heatmap) 36 | pred = preds[0] 37 | 38 | points_3d = self.camera.camera2world(pred, depth) 39 | # print('------------------------') 40 | # print(self.camera.camera2world(np.array([[640, 1000]]), np.array([1]))) 41 | if bone_length is not None: 42 | points_3d = self._skeleton_resize(points_3d, bone_length) 43 | self.skeleton = points_3d 44 | if to_mesh: 45 | self.skeleton_to_mesh() 46 | return self.skeleton 47 | 48 | def joints_2_mesh(self, joints_3d): 49 | self.skeleton = joints_3d 50 | self.skeleton_to_mesh() 51 | skeleton_mesh = self.skeleton_mesh 52 | self.skeleton_mesh = None 53 | self.skeleton = None 54 | return skeleton_mesh 55 | 56 | def joint_list_2_mesh_list(self, joints_3d_list): 57 | mesh_list = [] 58 | for joints_3d in joints_3d_list: 59 | mesh_list.append(self.joints_2_mesh(joints_3d)) 60 | return mesh_list 61 | 62 | def get_skeleton_mesh(self): 63 | if self.skeleton_mesh is None: 64 | raise Exception("Skeleton is not prepared.") 65 | else: 66 | return self.skeleton_mesh 67 | 68 | def save_skeleton_mesh(self, out_path): 69 | if self.skeleton_mesh is None: 70 | raise Exception("Skeleton is not prepared.") 71 | else: 72 | open3d.io.write_triangle_mesh(out_path, mesh=self.skeleton_mesh) 73 | 74 | def set_skeleton_from_file(self, heatmap_file, depth_file, bone_length_file=None, to_mesh=True): 75 | # load the average bone length 76 | if bone_length_file is not None: 77 | bone_length_mat = loadmat(bone_length_file) 78 | mean3D = bone_length_mat['mean3D'].T # convert shape to 15 * 3 79 | bones_mean = mean3D - mean3D[self.kinematic_parents, :] 80 | bone_length = np.linalg.norm(bones_mean, axis=1) 81 | else: 82 | bone_length = None 83 | heatmap_mat = loadmat(heatmap_file) 84 | depth_mat = loadmat(depth_file) 85 | depth = depth_mat['depth'][0] 86 | heatmap = heatmap_mat['heatmap'] 87 | heatmap = cv2.resize(heatmap, dsize=(1024, 1024), interpolation=cv2.INTER_NEAREST) 88 | heatmap = np.pad(heatmap, ((0, 0), (128, 128), (0, 0)), 'constant', constant_values=0) 89 | heatmap = heatmap.transpose((2, 0, 1)) 90 | return self.set_skeleton(heatmap, depth, bone_length, to_mesh) 91 | 92 | def skeleton_resize_seq(self, joint_list, bone_length_file): 93 | bone_length_mat = loadmat(bone_length_file) 94 | mean3D = bone_length_mat['mean3D'].T # convert shape to 15 * 3 95 | bones_mean = mean3D - mean3D[self.kinematic_parents, :] 96 | bone_length = np.linalg.norm(bones_mean, axis=1) 97 | 98 | for i in range(len(joint_list)): 99 | joint_list[i] = self._skeleton_resize(joint_list[i], bone_length) 100 | return joint_list 101 | 102 | def skeleton_resize_single(self, joint, bone_length_file): 103 | bone_length_mat = loadmat(bone_length_file) 104 | mean3D = bone_length_mat['mean3D'].T # convert shape to 15 * 3 105 | bones_mean = mean3D - mean3D[self.kinematic_parents, :] 106 | bone_length = np.linalg.norm(bones_mean, axis=1) 107 | 108 | joint = self._skeleton_resize(joint, bone_length) 109 | return joint 110 | 111 | def skeleton_resize_standard_skeleton(self, joint_input, joint_standard): 112 | """ 113 | 114 | :param joint_input: input joint shape: 15 * 3 115 | :param joint_standard: standard joint shape: 15 * 3 116 | :return: 117 | """ 118 | bones_mean = joint_standard - joint_standard[self.kinematic_parents, :] 119 | bone_length = np.linalg.norm(bones_mean, axis=1) * 1000. 120 | 121 | joint = self._skeleton_resize(joint_input, bone_length) 122 | return joint 123 | 124 | def _skeleton_resize(self, points_3d, bone_length): 125 | # resize the skeleton to the normal size (why we should do that?) 126 | estimated_bone_vec = points_3d - points_3d[self.kinematic_parents, :] 127 | estimated_bone_length = np.linalg.norm(estimated_bone_vec, axis=1) 128 | multi = bone_length[1:] / estimated_bone_length[1:] 129 | multi = np.concatenate(([0], multi)) 130 | multi = np.stack([multi] * 3, axis=1) 131 | resized_bones_vec = estimated_bone_vec * multi / 1000 132 | 133 | joints_rescaled = points_3d 134 | for i in range(joints_rescaled.shape[0]): 135 | joints_rescaled[i, :] = joints_rescaled[self.kinematic_parents[i], :] + resized_bones_vec[i, :] 136 | return joints_rescaled 137 | 138 | def render(self): 139 | mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=1) 140 | open3d.visualization.draw_geometries([self.skeleton_mesh, mesh_frame]) 141 | 142 | def skeleton_to_mesh(self): 143 | final_mesh = open3d.geometry.TriangleMesh() 144 | for i in range(len(self.skeleton)): 145 | keypoint_mesh = get_sphere(position=self.skeleton[i], radius=0.02) 146 | final_mesh = final_mesh + keypoint_mesh 147 | 148 | for line in self.lines: 149 | line_start_i = line[0] 150 | line_end_i = line[1] 151 | 152 | start_point = self.skeleton[line_start_i] 153 | end_point = self.skeleton[line_end_i] 154 | 155 | line_mesh = get_cylinder(start_point, end_point, radius=0.005) 156 | final_mesh += line_mesh 157 | self.skeleton_mesh = final_mesh 158 | return final_mesh 159 | 160 | def smooth(self, pose_sequence, sigma): 161 | """ 162 | gaussian smooth pose 163 | :param pose_sequence_2d: pose sequence, input is a list with every element is 15 * 2 body pose 164 | :param kernel_size: kernel size of guassian smooth 165 | :return: smoothed 2d pose 166 | """ 167 | pose_sequence = np.asarray(pose_sequence) 168 | pose_sequence_result = np.zeros_like(pose_sequence) 169 | keypoint_num = pose_sequence.shape[1] 170 | for i in range(keypoint_num): 171 | pose_sequence_i = pose_sequence[:, i, :] 172 | pose_sequence_filtered = gaussian_filter1d(pose_sequence_i, sigma, axis=0) 173 | pose_sequence_result[:, i, :] = pose_sequence_filtered 174 | return pose_sequence_result 175 | 176 | def get_max_preds(self, batch_heatmaps): 177 | ''' 178 | get predictions from score maps 179 | heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) 180 | ''' 181 | assert isinstance(batch_heatmaps, np.ndarray), \ 182 | 'batch_heatmaps should be numpy.ndarray' 183 | assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' 184 | 185 | batch_size = batch_heatmaps.shape[0] 186 | num_joints = batch_heatmaps.shape[1] 187 | width = batch_heatmaps.shape[3] 188 | heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) 189 | idx = np.argmax(heatmaps_reshaped, 2) 190 | maxvals = np.amax(heatmaps_reshaped, 2) 191 | 192 | maxvals = maxvals.reshape((batch_size, num_joints, 1)) 193 | idx = idx.reshape((batch_size, num_joints, 1)) 194 | 195 | preds = np.tile(idx, (1, 1, 2)).astype(np.float32) 196 | 197 | preds[:, :, 0] = (preds[:, :, 0]) % width 198 | preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) 199 | 200 | pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) 201 | pred_mask = pred_mask.astype(np.float32) 202 | 203 | preds *= pred_mask 204 | return preds, maxvals 205 | 206 | 207 | if __name__ == '__main__': 208 | skeleton = Skeleton( 209 | calibration_path='/home/wangjian/Develop/egocentricvisualization/pose/fisheye/fisheye.calibration.json') 210 | data_path = r'/home/wangjian/Develop/egocentricvisualization/data_2' 211 | heatmap_dir = os.path.join(data_path, 'heatmaps') 212 | depth_dir = os.path.join(data_path, 'depths') 213 | out_dir = os.path.join(data_path, 'smooth_skeleton_mesh') 214 | if not os.path.isdir(out_dir): 215 | os.mkdir(out_dir) 216 | skeleon_list = [] 217 | out_path_list = [] 218 | for heatmap_name in tqdm(sorted(os.listdir(heatmap_dir))): 219 | heatmap_path = os.path.join(heatmap_dir, heatmap_name) 220 | mat_id = heatmap_name 221 | depth_path = os.path.join(depth_dir, mat_id) 222 | 223 | skeleton_array = skeleton.set_skeleton_from_file(heatmap_path, 224 | depth_path, 225 | # bone_length_file=r'/home/wangjian/Develop/egocentricvisualization/pose/fisheye/mean3D.mat', 226 | to_mesh=False) 227 | out_path = os.path.join(out_dir, mat_id + ".ply") 228 | skeleon_list.append(skeleton_array) 229 | out_path_list.append(out_path) 230 | 231 | smoothed_skeleton = skeleton.smooth(skeleon_list, sigma=1) 232 | print("saving to ply") 233 | for i in tqdm(range(len(smoothed_skeleton))): 234 | skeleton.skeleton = smoothed_skeleton[i] 235 | skeleton.skeleton_to_mesh() 236 | skeleton.save_skeleton_mesh(out_path_list[i]) 237 | 238 | # skeleton.set_skeleton_from_file(r'X:\Mo2Cap2Plus\static00\Datasets\Mo2Cap2\ego_system_test\sitting\heatmaps\img-04052020001910-937.mat', 239 | # r'X:\Mo2Cap2Plus\static00\Datasets\Mo2Cap2\ego_system_test\sitting\depths\img-04052020001910-937.mat', 240 | # # bone_length_file=r'F:\Develop\EgocentricSystemVisualization\pose\fisheye\mean3D.mat') 241 | # ) 242 | # 243 | # skeleton.render() 244 | # print(skeleton.skeleton) 245 | --------------------------------------------------------------------------------