├── utils
    ├── __init__.py
    ├── fisheye
    │   ├── __init__.py
    │   ├── mean3D.mat
    │   ├── pose_fisheye_fisheye.calibration_new.json
    │   ├── fisheye.calibration-bak.json
    │   ├── fisheye.calibration.json
    │   ├── FishEyeEquisolid.py
    │   └── FishEyeCalibrated.py
    ├── torch_closest_rot_mat.py
    ├── data_utils.py
    ├── pytorch_gaussian.py
    ├── one_euro_filter.py
    ├── captury_studio_camera.py
    ├── pose_visualization_utils.py
    ├── pytorch_gmm.py
    ├── rigid_transform_with_scale.py
    ├── pytorch_gmm_from_scipy.py
    ├── utils.py
    └── skeleton.py
├── networks
    ├── __init__.py
    ├── dataset
    │   ├── __init__.py
    │   ├── local_dataset.py
    │   └── global_dataset.py
    ├── models
    │   ├── __init__.py
    │   ├── BaseVAE.py
    │   └── SeqConvVAE.py
    ├── train_global_mo2cap2.sh
    ├── train_local_mo2cap2.sh
    ├── train_global.sh
    ├── train_local.sh
    ├── README.md
    ├── config.py
    ├── sample.py
    ├── get_latent.py
    ├── make_dataset.py
    ├── train.py
    ├── interpolant.py
    ├── train_local_global.py
    └── train_local.py
├── MakeDataForOptimization
    ├── utils
    │   ├── __init__.py
    │   ├── fisheye
    │   │   ├── __init__.py
    │   │   ├── mean3D.mat
    │   │   ├── pose_fisheye_fisheye.calibration_new.json
    │   │   ├── fisheye.calibration-bak.json
    │   │   ├── fisheye.calibration.json
    │   │   ├── FishEyeEquisolid.py
    │   │   └── FishEyeCalibrated.py
    │   ├── calibrations
    │   │   └── new_fisheye.json
    │   ├── pose_visualization_utils.py
    │   ├── rigid_transform_with_scale.py
    │   ├── utils.py
    │   └── skeleton.py
    ├── bvh_reader
    │   ├── npybvh
    │   │   ├── __init__.py
    │   │   ├── images
    │   │   │   ├── keyframe.png
    │   │   │   └── skeleton.png
    │   │   ├── LICENSE
    │   │   ├── readme.md
    │   │   └── bvh.py
    │   └── read_egocentric_joint_position.py
    ├── README.md
    ├── slam_reader.py
    └── process_test_data.py
├── .gitignore
├── README.md
├── optimize_whole_sequence.py
└── calculate_errors.py


/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/networks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/networks/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/networks/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/fisheye/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/bvh_reader/npybvh/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/fisheye/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | __pycache__
3 | networks/logs
4 | data
5 | AMASSDataConverter


--------------------------------------------------------------------------------
/utils/fisheye/mean3D.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jianwang-mpi/GlobalEgoMocap/HEAD/utils/fisheye/mean3D.mat


--------------------------------------------------------------------------------
/utils/torch_closest_rot_mat.py:
--------------------------------------------------------------------------------
1 | import torch
2 | 
3 | def square_root(mat):
4 |     e,v = torch.symeig(mat)
5 | 
6 | def closest_rot_mat(rot_mat):
7 |     pass


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/fisheye/mean3D.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jianwang-mpi/GlobalEgoMocap/HEAD/MakeDataForOptimization/utils/fisheye/mean3D.mat


--------------------------------------------------------------------------------
/MakeDataForOptimization/bvh_reader/npybvh/images/keyframe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jianwang-mpi/GlobalEgoMocap/HEAD/MakeDataForOptimization/bvh_reader/npybvh/images/keyframe.png


--------------------------------------------------------------------------------
/MakeDataForOptimization/bvh_reader/npybvh/images/skeleton.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jianwang-mpi/GlobalEgoMocap/HEAD/MakeDataForOptimization/bvh_reader/npybvh/images/skeleton.png


--------------------------------------------------------------------------------
/utils/data_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | def mpjpe_on_different_joints(pose_preds, pose_gt):
4 |     distance = np.linalg.norm(pose_gt - pose_preds, axis=-1)
5 |     distance_on_each_joints = np.mean(distance, axis=tuple(range(distance.ndim - 1)))
6 |     return distance_on_each_joints


--------------------------------------------------------------------------------
/networks/train_global_mo2cap2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python train.py --log_dir cnn_global_mo2cap2_dataset --train_data_path /home/jianwang/ScanNet/static00/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --new_dataset False --with_mo2cap2_data True --fps 25 --network cnn


--------------------------------------------------------------------------------
/networks/train_local_mo2cap2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python train_local.py --log_dir cnn_local_mo2cap2_dataset --train_data_path /home/jianwang/ScanNet/static00/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --new_dataset False --with_mo2cap2_data True --fps 25 --network cnn


--------------------------------------------------------------------------------
/networks/train_global.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python train.py --log_dir cnn_global_full_dataset_latent_2048_len_10_kl_0.5 --train_data_path /home/jianwang/ScanNet/static00/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --new_dataset False --with_mo2cap2_data False --fps 25 --network cnn


--------------------------------------------------------------------------------
/networks/train_local.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python train_local.py --log_dir mlp_local_full_dataset_latent_2048_len_10_kl_0.5 --train_data_path /home/jianwang/ScanNet/static00/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --with_mo2cap2_data False --new_dataset False --fps 25 --network mlp


--------------------------------------------------------------------------------
/MakeDataForOptimization/README.md:
--------------------------------------------------------------------------------
 1 | ## Get Data for Optimization
 2 | 
 3 | 1. run Mo2Cap2 or other pose estimation network to get heatmap
 4 | and joint depth files
 5 |    
 6 | 2. get gt pose from BVH file and save it in to the pickle file
 7 | 
 8 |     see directory ```bvh_reader```
 9 | 
10 | 3. run OpenVSLAM on image sequence to get slam file 
11 |    ```frame_trajectory.txt```
12 | 
13 | 4. generate data for optimization
14 | 
15 |     see ```process_test_data.py```


--------------------------------------------------------------------------------
/utils/fisheye/pose_fisheye_fisheye.calibration_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "new",
 3 |   "sensor": [1, 1],
 4 |   "size": [1280, 1024],
 5 |   "animated": 0,
 6 |   "intrinsic":   [
 7 |     [500, 0, 639.074101, 0],
 8 |     [0, 500, 511.081780, 0],
 9 |     [0, 0, 1, 0],
10 |     [0, 0, 0, 1]
11 |   ],
12 |   "imageCircleRadius": 5.120000000000000E+02,
13 |   "polynomialC2W":  [-4.083907e+02, 0.000000e+00, 1.679882e-03, -3.677087e-06, 7.461604e-09 ],
14 |   "polynomialW2C":  [492.969845, 193.289959, -28.612327, 51.744505, -2.120082, 13.644155, 1.512262,
15 |     -18.789714, 18.962317, 14.989157, -12.692345, -5.804379, 3.508978, 1.511979 ],
16 |   "affine":      [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04],
17 |   "extrinsic":   [
18 |     [1, 0, 0, 0],
19 |     [0, 1, 0, 0],
20 |     [0, 0, 1, 0],
21 |     [0, 0, 0, 1]
22 |   ],
23 |   "radial":0
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/calibrations/new_fisheye.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "new",
 3 |   "sensor": [1, 1],
 4 |   "size": [1280, 1024],
 5 |   "animated": 0,
 6 |   "intrinsic":   [
 7 |     [500, 0, 639.074101, 0],
 8 |     [0, 500, 511.081780, 0],
 9 |     [0, 0, 1, 0],
10 |     [0, 0, 0, 1]
11 |   ],
12 |   "imageCircleRadius": 5.120000000000000E+02,
13 |   "polynomialC2W":  [-4.083907e+02, 0.000000e+00, 1.679882e-03, -3.677087e-06, 7.461604e-09 ],
14 |   "polynomialW2C":  [492.969845, 193.289959, -28.612327, 51.744505, -2.120082, 13.644155, 1.512262,
15 |     -18.789714, 18.962317, 14.989157, -12.692345, -5.804379, 3.508978, 1.511979 ],
16 |   "affine":      [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04],
17 |   "extrinsic":   [
18 |     [1, 0, 0, 0],
19 |     [0, 1, 0, 0],
20 |     [0, 0, 1, 0],
21 |     [0, 0, 0, 1]
22 |   ],
23 |   "radial":0
24 | }
25 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/fisheye/pose_fisheye_fisheye.calibration_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "new",
 3 |   "sensor": [1, 1],
 4 |   "size": [1280, 1024],
 5 |   "animated": 0,
 6 |   "intrinsic":   [
 7 |     [500, 0, 639.074101, 0],
 8 |     [0, 500, 511.081780, 0],
 9 |     [0, 0, 1, 0],
10 |     [0, 0, 0, 1]
11 |   ],
12 |   "imageCircleRadius": 5.120000000000000E+02,
13 |   "polynomialC2W":  [-4.083907e+02, 0.000000e+00, 1.679882e-03, -3.677087e-06, 7.461604e-09 ],
14 |   "polynomialW2C":  [492.969845, 193.289959, -28.612327, 51.744505, -2.120082, 13.644155, 1.512262,
15 |     -18.789714, 18.962317, 14.989157, -12.692345, -5.804379, 3.508978, 1.511979 ],
16 |   "affine":      [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04],
17 |   "extrinsic":   [
18 |     [1, 0, 0, 0],
19 |     [0, 1, 0, 0],
20 |     [0, 0, 1, 0],
21 |     [0, 0, 0, 1]
22 |   ],
23 |   "radial":0
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/networks/models/BaseVAE.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from abc import abstractmethod
 3 | import torch
 4 | 
 5 | 
 6 | class BaseVAE(nn.Module):
 7 |     
 8 |     def __init__(self) -> None:
 9 |         super(BaseVAE, self).__init__()
10 |     
11 |     def encode(self, input: torch.Tensor):
12 |         raise NotImplementedError
13 |     
14 |     def decode(self, input: torch.Tensor):
15 |         raise NotImplementedError
16 |     
17 |     def sample(self, batch_size, current_device, **kwargs) -> torch.Tensor:
18 |         raise RuntimeWarning()
19 |     
20 |     def generate(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
21 |         raise NotImplementedError
22 |     
23 |     @abstractmethod
24 |     def forward(self, *inputs: torch.Tensor) -> torch.Tensor:
25 |         pass
26 |     
27 |     @abstractmethod
28 |     def loss_function(self, *inputs, **kwargs) -> torch.Tensor:
29 |         pass
30 | 


--------------------------------------------------------------------------------
/utils/pytorch_gaussian.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from scipy.ndimage import gaussian_filter1d
 3 | import numpy as np
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | 
 8 | def gaussian(window_size, sigma):
 9 |     def gauss_fcn(x):
10 |         return -(x - window_size // 2)**2 / float(2 * sigma**2)
11 |     gauss = torch.stack(
12 |         [torch.exp(torch.tensor(gauss_fcn(x))) for x in range(window_size)])
13 |     return gauss / gauss.sum()
14 | 
15 | if __name__ == '__main__':
16 |     pose_random = np.random.randn()
17 |     smoothed_skeleton_seq_np = gaussian_filter1d(pose_random, sigma=1)
18 |     print(smoothed_skeleton_seq_np)
19 |     
20 |     
21 |     pose_random_torch = torch.from_numpy(pose_random).unsqueeze_(0).unsqueeze_(0).float()
22 |     kernel = gaussian(window_size=5, sigma=1)
23 |     kernel = kernel.unsqueeze_(0).unsqueeze_(0).float()
24 |     
25 |     smoothed_skeleton_seq_torch = F.conv1d(pose_random_torch, kernel)
26 |     print(smoothed_skeleton_seq_torch)


--------------------------------------------------------------------------------
/utils/fisheye/fisheye.calibration-bak.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "egosyn",
 3 |   "sensor": [1, 1],
 4 |   "size": [1280, 1024],
 5 |   "animated": 0,
 6 |   "intrinsic":   [
 7 |     [500, 0, 6.398405647786816e+02, 0],
 8 |     [0, 500, 512.11, 0],
 9 |     [0, 0, 1, 0],
10 |     [0, 0, 0, 1]
11 |   ],
12 |   "imageCircleRadius": 5.120000000000000E+02,
13 |   "polynomialC2W":  [-241.024,  0.000000000000000E+00,  0.001746,  -0.000004,
14 |     6.47811e-09],
15 |   "polynomialW2C":  [4.785893205484341E+02,  3.503715828980770E+02,  7.900065565120241E+01,  6.228794005673283E+01,
16 |     3.264466851189552E+01,  1.568380500967838E+01,  7.766879336977007E+00,  2.190791369989537E+00,
17 |     -1.084229689289942E-01, -1.903842667463734E-01, -2.776267870029922E-02],
18 |   "affine":      [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04],
19 |   "extrinsic":   [
20 |     [1, 0, 0, 0],
21 |     [0, 1, 0, 0],
22 |     [0, 0, 1, 0],
23 |     [0, 0, 0, 1]
24 |   ],
25 |   "radial":0
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/fisheye/fisheye.calibration-bak.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "egosyn",
 3 |   "sensor": [1, 1],
 4 |   "size": [1280, 1024],
 5 |   "animated": 0,
 6 |   "intrinsic":   [
 7 |     [500, 0, 6.398405647786816e+02, 0],
 8 |     [0, 500, 512.11, 0],
 9 |     [0, 0, 1, 0],
10 |     [0, 0, 0, 1]
11 |   ],
12 |   "imageCircleRadius": 5.120000000000000E+02,
13 |   "polynomialC2W":  [-241.024,  0.000000000000000E+00,  0.001746,  -0.000004,
14 |     6.47811e-09],
15 |   "polynomialW2C":  [4.785893205484341E+02,  3.503715828980770E+02,  7.900065565120241E+01,  6.228794005673283E+01,
16 |     3.264466851189552E+01,  1.568380500967838E+01,  7.766879336977007E+00,  2.190791369989537E+00,
17 |     -1.084229689289942E-01, -1.903842667463734E-01, -2.776267870029922E-02],
18 |   "affine":      [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04],
19 |   "extrinsic":   [
20 |     [1, 0, 0, 0],
21 |     [0, 1, 0, 0],
22 |     [0, 0, 1, 0],
23 |     [0, 0, 0, 1]
24 |   ],
25 |   "radial":0
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/networks/README.md:
--------------------------------------------------------------------------------
 1 | ## Motion VAE
 2 | 
 3 | ### Download pre-processed dataset
 4 | 
 5 | 1. Download the [pre-processed dataset](https://nextcloud.mpi-klsb.mpg.de/index.php/s/HWHGwZLi8xnLsRF).
 6 | 2. Unzip the zip file: ```unzip EgocentricAMASS.zip```
 7 | 
 8 | ### train the motion VAE
 9 | 
10 | 1. global motion VAE
11 | 
12 | ```
13 | python train.py --log_dir cnn_global_full_dataset_latent_2048_len_10_kl_0.5 --train_data_path /path/to/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --new_dataset False --with_mo2cap2_data False --fps 25 --network cnn
14 | ```
15 | 
16 | 2. local motion VAE
17 | 
18 | ```
19 | python train_local.py --log_dir mlp_local_full_dataset_latent_2048_len_10_kl_0.5 --train_data_path /path/to/EgocentricAMASS --latent_dim 2048 --kl_weight 0.5 --seq_length 10 --batch_size 64 --with_mo2cap2_data False --new_dataset False --fps 25 --network mlp
20 | ```
21 | 
22 | ### sample the motion from latent space
23 | 
24 | see ```sample.py```
25 | 
26 | ### interpolant two different motions
27 | 
28 | see ```interpolant.py```
29 | 


--------------------------------------------------------------------------------
/utils/fisheye/fisheye.calibration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "egosyn",
 3 |   "sensor": [1, 1],
 4 |   "size": [1280, 1024],
 5 |   "animated": 0,
 6 |   "intrinsic":   [
 7 |     [500, 0, 6.597087109684564E+02, 0],
 8 |     [0, 500, 5.300556618148025E+02, 0],
 9 |     [0, 0, 1, 0],
10 |     [0, 0, 0, 1]
11 |   ],
12 |   "imageCircleRadius": 5.120000000000000E+02,
13 |   "polynomialC2W":  [-2.924126419694919E+02,  0.000000000000000E+00,  1.075613595858202E-03,  2.072664555244253E-07,
14 |     4.493499097653669E-10, -1.192028310212584E-15, -1.822337421183959E-17],
15 |   "polynomialW2C":  [4.785893205484341E+02,  3.503715828980770E+02,  7.900065565120241E+01,  6.228794005673283E+01,
16 |     3.264466851189552E+01,  1.568380500967838E+01,  7.766879336977007E+00,  2.190791369989537E+00,
17 |     -1.084229689289942E-01, -1.903842667463734E-01, -2.776267870029922E-02],
18 |   "affine":      [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04],
19 |   "extrinsic":   [
20 |     [1, 0, 0, 0],
21 |     [0, 1, 0, 0],
22 |     [0, 0, 1, 0],
23 |     [0, 0, 0, 1]
24 |   ],
25 |   "radial":0
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/fisheye/fisheye.calibration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "egosyn",
 3 |   "sensor": [1, 1],
 4 |   "size": [1280, 1024],
 5 |   "animated": 0,
 6 |   "intrinsic":   [
 7 |     [500, 0, 6.597087109684564E+02, 0],
 8 |     [0, 500, 5.300556618148025E+02, 0],
 9 |     [0, 0, 1, 0],
10 |     [0, 0, 0, 1]
11 |   ],
12 |   "imageCircleRadius": 5.120000000000000E+02,
13 |   "polynomialC2W":  [-2.924126419694919E+02,  0.000000000000000E+00,  1.075613595858202E-03,  2.072664555244253E-07,
14 |     4.493499097653669E-10, -1.192028310212584E-15, -1.822337421183959E-17],
15 |   "polynomialW2C":  [4.785893205484341E+02,  3.503715828980770E+02,  7.900065565120241E+01,  6.228794005673283E+01,
16 |     3.264466851189552E+01,  1.568380500967838E+01,  7.766879336977007E+00,  2.190791369989537E+00,
17 |     -1.084229689289942E-01, -1.903842667463734E-01, -2.776267870029922E-02],
18 |   "affine":      [9.997488697329212E-01, -2.240239372797548E-04, 3.318272123957599E-04],
19 |   "extrinsic":   [
20 |     [1, 0, 0, 0],
21 |     [0, 1, 0, 0],
22 |     [0, 0, 1, 0],
23 |     [0, 0, 0, 1]
24 |   ],
25 |   "radial":0
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/bvh_reader/npybvh/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Mathias Parger
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/bvh_reader/npybvh/readme.md:
--------------------------------------------------------------------------------
 1 | # npybvh
 2 | Python 3 module for reading Biovision Hierarchy (BVH) files and convert keyframes to poses.
 3 | 
 4 | # Features
 5 | - Read Biovision Hierarchy files
 6 | - Extract skeleton information (joint hierarchy)
 7 | - Calculate full pose for all keyframes in numpy format
 8 | - Display skeleton
 9 | - Display pose at any keyframe
10 | - Display full animation
11 | 
12 | ![Skeleton](images/skeleton.png)
13 | ![Pose animation](images/keyframe.png)
14 | 
15 | 
16 | # How to use
17 | ```python
18 | anim = Bvh()
19 | anim.load('example.bvh')
20 | # get position at frame 11
21 | positions, rotations = anim.frame_pos(11)
22 | 
23 | # get name of joints in order of appearance in array
24 | joint_names = anim.joint_names()
25 | 
26 | # plot frame 11
27 | anim.plot_frame(11)
28 | ```
29 | 
30 | # Acknowledgements
31 | Thanks to Carnegie Mellon University for their [free motion capture database](http://mocap.cs.cmu.edu/) which is also available in [bvh format](https://sites.google.com/a/cgspeed.com/cgspeed/motion-capture/cmu-bvh-conversion).
32 | 
33 | # Contribute
34 | If you find bugs or miss features, please contribute! (or use issues tab :-))
35 | 


--------------------------------------------------------------------------------
/utils/one_euro_filter.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | 
 4 | def smoothing_factor(t_e, cutoff):
 5 |     r = 2 * math.pi * cutoff * t_e
 6 |     return r / (r + 1)
 7 | 
 8 | 
 9 | def exponential_smoothing(a, x, x_prev):
10 |     return a * x + (1 - a) * x_prev
11 | 
12 | 
13 | class OneEuroFilter:
14 |     def __init__(self, t0, x0, dx0=0.0, min_cutoff=1.0, beta=0.0,
15 |                  d_cutoff=1.0):
16 |         """Initialize the one euro filter."""
17 |         # The parameters.
18 |         self.min_cutoff = float(min_cutoff)
19 |         self.beta = float(beta)
20 |         self.d_cutoff = float(d_cutoff)
21 |         # Previous values.
22 |         self.x_prev = float(x0)
23 |         self.dx_prev = float(dx0)
24 |         self.t_prev = float(t0)
25 | 
26 |     def __call__(self, t, x):
27 |         """Compute the filtered signal."""
28 |         t_e = t - self.t_prev
29 | 
30 |         # The filtered derivative of the signal.
31 |         a_d = smoothing_factor(t_e, self.d_cutoff)
32 |         dx = (x - self.x_prev) / t_e
33 |         dx_hat = exponential_smoothing(a_d, dx, self.dx_prev)
34 | 
35 |         # The filtered signal.
36 |         cutoff = self.min_cutoff + self.beta * abs(dx_hat)
37 |         a = smoothing_factor(t_e, cutoff)
38 |         x_hat = exponential_smoothing(a, x, self.x_prev)
39 | 
40 |         # Memorize the previous values.
41 |         self.x_prev = x_hat
42 |         self.dx_prev = dx_hat
43 |         self.t_prev = t
44 | 
45 |         return x_hat


--------------------------------------------------------------------------------
/MakeDataForOptimization/bvh_reader/read_egocentric_joint_position.py:
--------------------------------------------------------------------------------
 1 | import open3d
 2 | from utils.skeleton import Skeleton
 3 | from npybvh.bvh import Bvh
 4 | import numpy as np
 5 | from tqdm import tqdm
 6 | import pickle
 7 | 
 8 | skeleton_model = Skeleton(None)
 9 | anim = Bvh()
10 | egocentric_joints = [6, 15, 16, 17, 10, 11, 12, 23, 24, 25, 26, 19, 20, 21, 22]
11 | 
12 | 
13 | def parse_file(bvh_file_path, output_file_path, start_frame, input_frame_rate, output_frame_rate):
14 |     anim.parse_file(bvh_file_path)
15 |     gt_pose_seq = []
16 |     print(anim.frames)
17 |     print(anim.joint_names())
18 |     step = round(input_frame_rate / output_frame_rate)
19 |     for frame in tqdm(range(start_frame, anim.frames, step)):
20 |         positions, rotations = anim.frame_pose(frame)
21 | 
22 |         positions = positions[egocentric_joints]
23 |         positions = positions / 1000
24 |         gt_pose_seq.append(positions)
25 | 
26 |         skeleton = skeleton_model.joints_2_mesh(positions)
27 | 
28 |         open3d.visualization.draw_geometries([skeleton])
29 |     gt_pose_seq = np.asarray(gt_pose_seq)
30 |     # skeleton_list = skeleton_model.joint_list_2_mesh_list(gt_pose_seq)
31 |     # open3d.visualization.draw_geometries(skeleton_list)
32 |     with open(output_file_path, 'wb') as f:
33 |         pickle.dump(gt_pose_seq, f)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     parse_file(r'\\winfs-inf\HPS\Mo2Cap2Plus1\static00\CapturyData\GoProResults\captury\unknown.bvh',
38 |                'data/wild/wild.pkl', start_frame=0, input_frame_rate=50, output_frame_rate=25)


--------------------------------------------------------------------------------
/utils/captury_studio_camera.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class CapturyCamera:
 5 |     def __init__(self, camera_path, camera_number):
 6 |         camera_data = self.load_camera_model(camera_path, camera_number)
 7 |         self.intrinsic = camera_data['intrinsic']
 8 |         self.extrinsic = camera_data['extrinsic']
 9 |         self.distortion = camera_data['distortion']
10 | 
11 |     def get_camera_model(self):
12 |         return self.intrinsic, self.extrinsic, self.distortion
13 | 
14 |     def load_camera_model(self, camera_path, camera_number):
15 |         with open(camera_path) as f:
16 |             lines = f.readlines()
17 |         start_line_number = -1
18 |         for i, line in enumerate(lines):
19 |             if 'camera	{}'.format(camera_number) in line:
20 |                 start_line_number = i
21 |                 break
22 |         if start_line_number == -1:
23 |             print('camera not found')
24 |         camera_lines = lines[start_line_number: start_line_number + 27]
25 |         distortion_line = camera_lines[11]
26 |         extrinsic_lines = camera_lines[73 - 56: 76 - 56]
27 |         intrinsic_lines = camera_lines[77 - 56: 80 - 56]
28 |         distortions = distortion_line.split()[1:]
29 |         distortions = np.asarray(distortions).astype(np.float)
30 |         # print(distortions)
31 |         extrinsic_lines = [line.split()[1:] for line in extrinsic_lines]
32 |         intrinsic_lines = [line.split()[1:] for line in intrinsic_lines]
33 | 
34 |         extrinsic = np.asarray(extrinsic_lines).astype(np.float)
35 |         intrinsic = np.asarray(intrinsic_lines).astype(np.float)
36 | 
37 |         return {'intrinsic': intrinsic,
38 |                 'extrinsic': extrinsic,
39 |                 'distortion': distortions}
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Egocentric Pose Optimizer
 2 | 
 3 | The official implementation of paper: 
 4 | 
 5 | ### Estimating Egocentric 3D Human Pose in Global Space.
 6 | 
 7 | [[project page]](https://vcai.mpi-inf.mpg.de/projects/globalegomocap/)
 8 | 
 9 | If you find this repository useful, please cite:
10 | 
11 | ```
12 | @InProceedings{Wang_2021_ICCV,
13 |     author    = {Wang, Jian and Liu, Lingjie and Xu, Weipeng and Sarkar, Kripasindhu and Theobalt, Christian},
14 |     title     = {Estimating Egocentric 3D Human Pose in Global Space},
15 |     booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
16 |     month     = {October},
17 |     year      = {2021},
18 |     pages     = {11500-11509}
19 | }
20 | ```
21 | 
22 | ### Optimize the motion sequences in proposed dataset
23 | 
24 | 1. Install pytorch 1.4+ with cuda support.
25 | 2. Run ```mkdir networks/logs``` and download the trained VAE model into directory ```networks/logs``` from [here](https://nextcloud.mpi-klsb.mpg.de/index.php/s/ibBB7TbEsWQrMJa).
26 | 3. Run ```mkdir data``` and download the processed test sequences into directory ```data``` from [here](https://nextcloud.mpi-klsb.mpg.de/index.php/s/kLNeAdbJzmSYKsZ).
27 | 4. Run the test on the sequences:
28 | ```
29 | python optimize_whole_sequence.py --data_path data/jian3
30 | python optimize_whole_sequence.py --data_path data/studio-jian1
31 | python optimize_whole_sequence.py --data_path data/studio-jian2
32 | python optimize_whole_sequence.py --data_path data/studio-lingjie1
33 | python optimize_whole_sequence.py --data_path data/studio-lingjie2
34 | ```
35 | 
36 | ### Train the motion vae
37 | 
38 | If you want to train the motion VAE, See directory ```networks```
39 | 
40 | ### prepare the data for optimization
41 | If you want to run on your own dataset,
42 | you need to firstly preprocess the data following the scripts in directory: ```MakeDataForOptimization```.
43 | Basically you need to prepare the following data:
44 | 1. Predicted human body heatmap in egocentric view.
45 | 2. Predicted human body joint depths in egocentric view.
46 | 3. Human body ground truth (for calculating MPJPEs).
47 | 4. Camera pose sequence from the OpenVSLAM method.
48 | 
49 | All of these data are combined into a single pickle file for each sequence.
50 | 
51 | 


--------------------------------------------------------------------------------
/networks/config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | parser = argparse.ArgumentParser(description='Estimate the ego-motion from single images')
 4 | 
 5 | parser.add_argument('--train_data_path', default='/home/jianwang/ScanNet/static00/EgocentricAMASSPytorch/relative_global_pose_20_25.h5',
 6 |                     type=str, help='train data dir')
 7 | #
 8 | parser.add_argument('--test_data_path', default='/home/jianwang/ScanNet/static00/EgocentricAMASSPytorch/relative_global_pose_20_25.h5',
 9 |                     type=str, help='test data dir')
10 | 
11 | parser.add_argument('--network', type=str, default='cnn', required=False, help='network type')
12 | 
13 | parser.add_argument('--attention', type=str, default=None, required=False, help='attention type')
14 | 
15 | parser.add_argument('--latent_dim', type=int, required=True, help='vae latent dimension')
16 | 
17 | parser.add_argument('--with_mo2cap2_data', type=lambda x: (str(x).lower() == 'true'), required=True, default=False, help='use mo2cap2 dataset')
18 | 
19 | parser.add_argument('--new_dataset', type=lambda x: (str(x).lower() == 'true'), required=True, default=True, help='use new dataset')
20 | 
21 | parser.add_argument('--data_balance', type=lambda x: (str(x).lower() == 'true'), required=False, default=False, help='balance walking data')
22 | 
23 | parser.add_argument('--slide_window_step', default=1, type=int, required=False, help='size of sample window')
24 | 
25 | parser.add_argument('--seq_length', type=int, required=True, help='length of sequence')
26 | 
27 | parser.add_argument('--fps', type=int, required=True, default=25, help='fps')
28 | 
29 | parser.add_argument('--kl_weight', default=0.25, type=float, required=True, help='kl weight')
30 | 
31 | parser.add_argument('--epoch', default=20, type=int, help='number of total epochs to run')
32 | 
33 | parser.add_argument('--batch_size', default=64, type=int, help='batch size on every gpu')
34 | 
35 | parser.add_argument('--num_workers', default=8, type=int, help='number of workers loading data')
36 | 
37 | parser.add_argument('--learning_rate', default=1e-4, type=float, help='initial learning rate')
38 | 
39 | parser.add_argument('--weight_decay', default=0, type=float, help='weight decay (default: 1e-4)')
40 | 
41 | parser.add_argument('--log_dir', default=None, type=str, help='logging directory')
42 | 
43 | 
44 | 
45 | parser.add_argument('--log_prefix', default=None, type=str, help='logging prefix')
46 | 
47 | parser.add_argument('--log_step', default=100, type=int, help='logging step')
48 | 
49 | parser.add_argument('--save_step', default=2000, type=int, help='saving step')
50 | 
51 | args = parser.parse_args()


--------------------------------------------------------------------------------
/utils/pose_visualization_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import open3d
 3 | import numpy as np
 4 | from scipy.spatial.transform import Rotation
 5 | 
 6 | def get_sphere(position, radius=1.0, color=(0.1, 0.1, 0.7)):
 7 |     mesh_sphere: open3d.geometry.TriangleMesh = open3d.geometry.TriangleMesh.create_sphere(radius=radius)
 8 |     mesh_sphere.paint_uniform_color(color)
 9 | 
10 |     # translate to position
11 |     mesh_sphere = mesh_sphere.translate(position, relative=False)
12 |     return mesh_sphere
13 | 
14 | def rotation_matrix_from_vectors(vec1, vec2):
15 |     """ Find the rotation matrix that aligns vec1 to vec2
16 |     :param vec1: A 3d "source" vector
17 |     :param vec2: A 3d "destination" vector
18 |     :return mat: A transform matrix (3x3) which when applied to vec1, aligns it with vec2.
19 |     """
20 |     a, b = (vec1 / np.linalg.norm(vec1)).reshape(3), (vec2 / np.linalg.norm(vec2)).reshape(3)
21 |     v = np.cross(a, b)
22 |     c = np.dot(a, b)
23 |     s = np.linalg.norm(v)
24 |     kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
25 |     rotation_matrix = np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s ** 2))
26 |     return rotation_matrix
27 | 
28 | def get_cylinder(start_point, end_point, radius=0.3, color=(0.1, 0.9, 0.1)):
29 |     center = (start_point + end_point) / 2
30 |     height = np.linalg.norm(start_point - end_point)
31 |     mesh_cylinder: open3d.geometry.TriangleMesh = open3d.geometry.TriangleMesh.create_cylinder(radius=radius, height=height)
32 |     mesh_cylinder.paint_uniform_color(color)
33 | 
34 |     # translate and rotate to position
35 |     # rotate vector
36 |     rot_vec = end_point - start_point
37 |     rot_vec = rot_vec / np.linalg.norm(rot_vec)
38 |     rot_0 = np.array([0, 0, 1])
39 |     rot_mat = rotation_matrix_from_vectors(rot_0, rot_vec)
40 |     # if open3d.__version__ >= '0.9.0.0':
41 |     #     rotation_param = rot_mat
42 |     # else:
43 |     #     rotation_param = Rotation.from_matrix(rot_mat).as_euler('xyz')
44 |     rotation_param = rot_mat
45 |     mesh_cylinder = mesh_cylinder.rotate(rotation_param)
46 |     mesh_cylinder = mesh_cylinder.translate(center, relative=False)
47 |     return mesh_cylinder
48 | 
49 | if __name__ == '__main__':
50 |     point1 = np.array([-1, 11, 8])
51 |     point2 = np.array([12, -1, 5])
52 |     sphere1 = get_sphere(position=point1, radius=0.1)
53 |     sphere2 = get_sphere(position=point2, radius=0.1)
54 |     cylinder = get_cylinder(start_point=point1, end_point=point2, radius=0.02)
55 | 
56 |     mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5)
57 |     
58 |     open3d.visualization.draw_geometries(
59 |         [sphere1, sphere2, cylinder, mesh_frame])


--------------------------------------------------------------------------------
/networks/sample.py:
--------------------------------------------------------------------------------
 1 | import open3d
 2 | import sys
 3 | sys.path.append('..')
 4 | import torch
 5 | from models.SeqConvVAE import ConvVAE
 6 | import pickle
 7 | from utils.skeleton import Skeleton
 8 | import os
 9 | from tqdm import tqdm
10 | 
11 | class Sample:
12 |     def __init__(self, network_path, out_path, seq_length=20, latent_dim=2048):
13 |         self.seq_length = seq_length
14 |         self.out_path = out_path
15 |         self.network = ConvVAE(in_channels=45, out_channels=45, latent_dim=latent_dim, seq_len=self.seq_length)
16 |         state_dict = torch.load(network_path)['state_dict']
17 |         self.network.load_state_dict(state_dict)
18 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
19 |         self.network = self.network.to(self.device)
20 |         
21 |         self.skeleton_model = Skeleton(None)
22 |         
23 |     def save_sample(self, sample, out_path):
24 |         skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample)
25 |         for i, skeleton in enumerate(skeleton_list):
26 |             open3d.io.write_triangle_mesh(os.path.join(out_path, '{}.ply'.format(i)), skeleton)
27 |             
28 |     def show_example(self, sample):
29 |         skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample)
30 |         open3d.visualization.draw_geometries(skeleton_list)
31 |         
32 |     def sample(self, sample_num=10, visualization=False):
33 |         sampled_result = self.network.sample(sample_num, current_device=self.device)
34 |         sampled_result = sampled_result.cpu().detach_().numpy()
35 |         sampled_result = sampled_result.reshape((sample_num, self.seq_length, 15, 3))
36 |         for i, sample in tqdm(enumerate(sampled_result)):
37 |             print('sample: {}'.format(i))
38 |             if visualization is True:
39 |                 self.show_example(sample)
40 |             else:
41 |                 out_path = os.path.join(self.out_path, 'sample_{}'.format(i))
42 |                 if not os.path.isdir(out_path):
43 |                     os.mkdir(out_path)
44 |                 self.show_example(sample)
45 |                 self.save_sample(sample, out_path)
46 |         
47 | if __name__ == '__main__':
48 |     
49 |     # sampler = Sample(network_path='logs/test/checkpoints/37.pth.tar',
50 |     #                  out_path='out/sample')
51 |     sampler = Sample(network_path='logs/only_local_full_dataset_latent_2048_len_10_kl_0.5_2/checkpoints/19.pth.tar',
52 |                      out_path='out/sample/slide_window_latent_2048_len_5_windows_size_5_kl_0.5', seq_length=10,
53 |                      latent_dim=2048)
54 |     sampler.sample(sample_num=12, visualization=True)
55 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/pose_visualization_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import open3d
 3 | import numpy as np
 4 | from scipy.spatial.transform import Rotation
 5 | 
 6 | def get_sphere(position, radius=1.0, color=(0.1, 0.1, 0.7)):
 7 |     mesh_sphere: open3d.geometry.TriangleMesh = open3d.geometry.TriangleMesh.create_sphere(radius=radius)
 8 |     mesh_sphere.paint_uniform_color(color)
 9 | 
10 |     # translate to position
11 |     mesh_sphere = mesh_sphere.translate(position, relative=False)
12 |     return mesh_sphere
13 | 
14 | def rotation_matrix_from_vectors(vec1, vec2):
15 |     """ Find the rotation matrix that aligns vec1 to vec2
16 |     :param vec1: A 3d "source" vector
17 |     :param vec2: A 3d "destination" vector
18 |     :return mat: A transform matrix (3x3) which when applied to vec1, aligns it with vec2.
19 |     """
20 |     a, b = (vec1 / np.linalg.norm(vec1)).reshape(3), (vec2 / np.linalg.norm(vec2)).reshape(3)
21 |     v = np.cross(a, b)
22 |     c = np.dot(a, b)
23 |     s = np.linalg.norm(v)
24 |     kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
25 |     rotation_matrix = np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s ** 2))
26 |     return rotation_matrix
27 | 
28 | def get_cylinder(start_point, end_point, radius=0.3, color=(0.1, 0.9, 0.1)):
29 |     center = (start_point + end_point) / 2
30 |     height = np.linalg.norm(start_point - end_point)
31 |     mesh_cylinder: open3d.geometry.TriangleMesh = open3d.geometry.TriangleMesh.create_cylinder(radius=radius, height=height)
32 |     mesh_cylinder.paint_uniform_color(color)
33 | 
34 |     # translate and rotate to position
35 |     # rotate vector
36 |     rot_vec = end_point - start_point
37 |     rot_vec = rot_vec / np.linalg.norm(rot_vec)
38 |     rot_0 = np.array([0, 0, 1])
39 |     rot_mat = rotation_matrix_from_vectors(rot_0, rot_vec)
40 |     # if open3d.__version__ >= '0.9.0.0':
41 |     #     rotation_param = rot_mat
42 |     # else:
43 |     #     rotation_param = Rotation.from_matrix(rot_mat).as_euler('xyz')
44 |     rotation_param = rot_mat
45 |     mesh_cylinder = mesh_cylinder.rotate(rotation_param)
46 |     mesh_cylinder = mesh_cylinder.translate(center, relative=False)
47 |     return mesh_cylinder
48 | 
49 | if __name__ == '__main__':
50 |     point1 = np.array([-1, 11, 8])
51 |     point2 = np.array([12, -1, 5])
52 |     sphere1 = get_sphere(position=point1, radius=0.1)
53 |     sphere2 = get_sphere(position=point2, radius=0.1)
54 |     cylinder = get_cylinder(start_point=point1, end_point=point2, radius=0.02)
55 | 
56 |     mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5)
57 |     
58 |     open3d.visualization.draw_geometries(
59 |         [sphere1, sphere2, cylinder, mesh_frame])


--------------------------------------------------------------------------------
/utils/fisheye/FishEyeEquisolid.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | 
 6 | class FishEyeCameraEquisolid:
 7 |     def __init__(self, focal_length, sensor_size, img_size, use_gpu=False):
 8 |         """
 9 |         @param    focal_length: focal length of camera in mm
10 |         @param    sensor_size: sensor size of camera in mm
11 |         @param    img_size: image size of w, h
12 |         @param    use_gpu: whether use the gpu to accelerate the calculation
13 |         """
14 |         self.sensor_size = sensor_size
15 |         self.img_size = np.asarray(img_size)
16 |         self.use_gpu = use_gpu
17 |         # calculate the focal length in pixel
18 |         self.focal_length = focal_length / np.max(sensor_size) * np.max(img_size)
19 | 
20 |         # calculate the image center
21 |         self.img_center = self.img_size / 2 + 1e-10
22 |         # get max distance from image center
23 |         self.max_distance = self.focal_length * np.sqrt(2)
24 | 
25 |         if self.use_gpu:
26 |             gpu_ok = torch.cuda.is_available()
27 |             if gpu_ok is False:
28 |                 raise Exception("GPU is not available!")
29 | 
30 |     def camera2world(self, point: np.ndarray, depth: np.ndarray):
31 |         """
32 |         @param point: 2d point in shape: (n, 2)
33 |         @param depth: depth of every points
34 |         @return: 3d position of every point
35 |         """
36 |         # get the distance of point to center
37 |         depth = depth.astype(np.float)
38 |         point_centered = point.astype(np.float) - self.img_center
39 |         x = point_centered[:, 0]
40 |         y = point_centered[:, 1]
41 |         distance_from_center = np.sqrt(np.square(x) + np.square(y))
42 |         distance_from_center[distance_from_center > self.max_distance - 30] = self.max_distance
43 | 
44 |         theta = 2 * np.arcsin(distance_from_center / (2 * self.focal_length))
45 |         Z = distance_from_center / np.tan(theta)
46 | 
47 |         # square_sin_theta_div_2 = np.square(distance_from_center / (2 * self.focal_length))
48 |         # tan_theta_div_1 = np.sqrt(1 / (4 * square_sin_theta_div_2 * (1 - square_sin_theta_div_2)) - 1)
49 |         # Z = distance_from_center * tan_theta_div_1
50 |         point_3d = np.array([x, y, Z])
51 |         norm = np.linalg.norm(point_3d, axis=0)
52 |         point_3d = point_3d / norm * depth
53 |         return point_3d.transpose()
54 | 
55 |     def camera2world_pytorch(self, point: torch.Tensor(), depth: torch.Tensor):
56 |         pass
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     camera = FishEyeCameraEquisolid(focal_length=9, sensor_size=32, img_size=(1280, 1024))
61 |     point = np.array([[660, 20], [660, 20], ])
62 |     depth = np.array([10, 10])
63 |     point3d = camera.camera2world(point, depth)
64 |     print(point3d)
65 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/rigid_transform_with_scale.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.linalg
 3 | import random
 4 | 
 5 | # Relevant links:
 6 | #   - http://stackoverflow.com/a/32244818/263061 (solution with scale)
 7 | #   - "Least-Squares Rigid Motion Using SVD" (no scale but easy proofs and explains how weights could be added)
 8 | 
 9 | 
10 | # Rigidly (+scale) aligns two point clouds with know point-to-point correspondences
11 | # with least-squares error.
12 | # Returns (scale factor c, rotation matrix R, translation vector t) such that
13 | #   Q = P*cR + t
14 | # if they align perfectly, or such that
15 | #   SUM over point i ( | P_i*cR + t - Q_i |^2 )
16 | # is minimised if they don't align perfectly.
17 | 
18 | def umeyama_ransac(P, Q, epsilon=0.4, n_iters=100):
19 |     assert P.shape == Q.shape
20 |     inliner_set = []
21 |     point_length = P.shape[0]
22 |     for i in range(n_iters):
23 |         sampled_points = random.sample(list(range(point_length)), 4)
24 |         sampled_P = P[sampled_points]
25 |         sampled_Q = Q[sampled_points]
26 |         c, R, t = umeyama(sampled_P, sampled_Q)
27 | 
28 |         projected_P = P @ R * c + t
29 |         new_inliner_set = []
30 |         for j in range(point_length):
31 |             if np.linalg.norm(projected_P[j] - Q[j], ord=2) < epsilon:
32 |                 new_inliner_set.append(j)
33 |         if len(new_inliner_set) > len(inliner_set):
34 |             inliner_set = new_inliner_set
35 | 
36 |     sampled_P = P[inliner_set]
37 |     sampled_Q = Q[inliner_set]
38 |     c, R, t = umeyama(sampled_P, sampled_Q)
39 |     return c, R, t
40 | 
41 | def umeyama(P, Q):
42 |     assert P.shape == Q.shape
43 |     n, dim = P.shape
44 | 
45 |     centeredP = P - P.mean(axis=0)
46 |     centeredQ = Q - Q.mean(axis=0)
47 | 
48 |     C = np.dot(np.transpose(centeredP), centeredQ) / n
49 | 
50 |     V, S, W = np.linalg.svd(C)
51 |     d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0
52 | 
53 |     if d:
54 |         S[-1] = -S[-1]
55 |         V[:, -1] = -V[:, -1]
56 | 
57 |     R = np.dot(V, W)
58 | 
59 |     varP = np.var(P, axis=0).sum()
60 |     c = 1/varP * np.sum(S) # scale factor
61 | 
62 |     t = Q.mean(axis=0) - P.mean(axis=0).dot(c*R)
63 | 
64 |     return c, R, t
65 | 
66 | def umeyama_dim_2(P, Q):
67 |     assert P.shape == Q.shape
68 |     n, dim1 = P.shape
69 | 
70 |     centeredP = P
71 |     centeredQ = Q
72 | 
73 |     C = np.dot(np.transpose(centeredP), centeredQ) / n
74 | 
75 |     V, S, W = np.linalg.svd(C)
76 |     d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0
77 | 
78 |     if d:
79 |         S[-1] = -S[-1]
80 |         V[:, -1] = -V[:, -1]
81 | 
82 |     R = np.dot(V, W)
83 | 
84 |     varP = np.var(P, axis=0).sum()
85 |     c = 1/varP * np.sum(S) # scale factor
86 | 
87 |     t = Q.mean(axis=0) - P.mean(axis=0).dot(c*R)
88 | 
89 |     return c, R, t


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/fisheye/FishEyeEquisolid.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | 
 6 | class FishEyeCameraEquisolid:
 7 |     def __init__(self, focal_length, sensor_size, img_size, use_gpu=False):
 8 |         """
 9 |         @param    focal_length: focal length of camera in mm
10 |         @param    sensor_size: sensor size of camera in mm
11 |         @param    img_size: image size of w, h
12 |         @param    use_gpu: whether use the gpu to accelerate the calculation
13 |         """
14 |         self.sensor_size = sensor_size
15 |         self.img_size = np.asarray(img_size)
16 |         self.use_gpu = use_gpu
17 |         # calculate the focal length in pixel
18 |         self.focal_length = focal_length / np.max(sensor_size) * np.max(img_size)
19 | 
20 |         # calculate the image center
21 |         self.img_center = self.img_size / 2 + 1e-10
22 |         # get max distance from image center
23 |         self.max_distance = self.focal_length * np.sqrt(2)
24 | 
25 |         if self.use_gpu:
26 |             gpu_ok = torch.cuda.is_available()
27 |             if gpu_ok is False:
28 |                 raise Exception("GPU is not available!")
29 | 
30 |     def camera2world(self, point: np.ndarray, depth: np.ndarray):
31 |         """
32 |         @param point: 2d point in shape: (n, 2)
33 |         @param depth: depth of every points
34 |         @return: 3d position of every point
35 |         """
36 |         # get the distance of point to center
37 |         depth = depth.astype(np.float)
38 |         point_centered = point.astype(np.float) - self.img_center
39 |         x = point_centered[:, 0]
40 |         y = point_centered[:, 1]
41 |         distance_from_center = np.sqrt(np.square(x) + np.square(y))
42 |         distance_from_center[distance_from_center > self.max_distance - 30] = self.max_distance
43 | 
44 |         theta = 2 * np.arcsin(distance_from_center / (2 * self.focal_length))
45 |         Z = distance_from_center / np.tan(theta)
46 | 
47 |         # square_sin_theta_div_2 = np.square(distance_from_center / (2 * self.focal_length))
48 |         # tan_theta_div_1 = np.sqrt(1 / (4 * square_sin_theta_div_2 * (1 - square_sin_theta_div_2)) - 1)
49 |         # Z = distance_from_center * tan_theta_div_1
50 |         point_3d = np.array([x, y, Z])
51 |         norm = np.linalg.norm(point_3d, axis=0)
52 |         point_3d = point_3d / norm * depth
53 |         return point_3d.transpose()
54 | 
55 |     def camera2world_pytorch(self, point: torch.Tensor(), depth: torch.Tensor):
56 |         pass
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     camera = FishEyeCameraEquisolid(focal_length=9, sensor_size=32, img_size=(1280, 1024))
61 |     point = np.array([[660, 20], [660, 20], ])
62 |     depth = np.array([10, 10])
63 |     point3d = camera.camera2world(point, depth)
64 |     print(point3d)
65 | 


--------------------------------------------------------------------------------
/utils/pytorch_gmm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from math import pi
 5 | from scipy.special import logsumexp
 6 | from sklearn.mixture import GaussianMixture
 7 | import pickle
 8 | 
 9 | 
10 | class GaussianMixturePytorch:
11 |     """
12 |     Fits a mixture of k=1,..,K Gaussians to the input data (K is supplied via n_components). Input tensors are expected to be flat with dimensions (n: number of samples, d: number of features).
13 |     The model then extends them to (n, 1, d).
14 |     The model parametrization (mu, sigma) is stored as (1, k, d), and probabilities are shaped (n, k, 1) if they relate to an individual sample, or (1, k, 1) if they assign membership probabilities to one of the mixture components.
15 |     """
16 |     
17 |     def __init__(self, scipy_model_path):
18 | 
19 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
20 |         
21 |         self.mu, self.precisions_cholesky, self.covariances = \
22 |             self.load_from_scipy_model(scipy_model_path)
23 |     
24 |     def load_from_scipy_model(self, scipy_model_path):
25 |         with open(scipy_model_path, 'rb') as f:
26 |             gmm: GaussianMixture = pickle.load(f)
27 |         means = torch.from_numpy(gmm.means_).float().to(self.device)
28 |         precisions_cholesky = torch.from_numpy(gmm.precisions_cholesky_).float().to(self.device)
29 |         covariances = torch.from_numpy(gmm.covariances_).float().to(self.device)
30 |         return means, precisions_cholesky, covariances
31 |     
32 |     def _estimate_log_prob(self, X):
33 |         return self._estimate_log_gaussian_prob(
34 |             X, self.mu, self.precisions_cholesky)
35 |     
36 |     def score_samples(self, X):
37 |         X = X.to(self.device)
38 |         return torch.logsumexp(self._estimate_log_prob(X), dim=1)
39 |     
40 |     def _compute_log_det_cholesky(self, matrix_chol, n_features):
41 |         """
42 |         Compute the log-det of the cholesky decomposition of matrices.
43 |         """
44 |         
45 |         n_components, _, _ = matrix_chol.shape
46 |         log_det_chol = (torch.sum(torch.log(matrix_chol.view(n_components, -1)[:, ::n_features + 1]), 1))
47 |         
48 |         return log_det_chol
49 |     
50 |     def _estimate_log_gaussian_prob(self, X, means, precisions_chol):
51 |         n_samples, n_features = X.shape
52 |         n_components, _ = means.shape
53 |         # det(precision_chol) is half of det(precision)
54 |         log_det = self._compute_log_det_cholesky(
55 |             precisions_chol, n_features)
56 |         
57 |         log_prob = torch.empty((n_samples, n_components)).to(self.device)
58 |         for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)):
59 |             y = torch.matmul(X, prec_chol) - torch.matmul(mu, prec_chol)
60 |             log_prob[:, k] = torch.sum(torch.square(y), axis=1)
61 |         
62 |         return -.5 * (n_features * torch.log(torch.Tensor([2 * np.pi])).to(self.device) + log_prob) + log_det
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     gmm = GaussianMixturePytorch('../AMASSDataConverter/model_global_20_5.pkl')
67 |     X = torch.randn(5, 225)
68 |     res = gmm.score_samples(X)
69 |     print(res)
70 | 
71 |     with open('../AMASSDataConverter/model_global_20_5.pkl', 'rb') as f:
72 |         gmm_scipy: GaussianMixture = pickle.load(f)
73 |     X = X.cpu().numpy()
74 |     print(gmm_scipy.score_samples(X))
75 | 


--------------------------------------------------------------------------------
/networks/get_latent.py:
--------------------------------------------------------------------------------
 1 | import open3d
 2 | import sys
 3 | sys.path.append('..')
 4 | import torch
 5 | from SeqConvVAE import ConvVAE
 6 | import pickle
 7 | from utils.skeleton import Skeleton
 8 | import os
 9 | from tqdm import tqdm
10 | from dataset import AMASSDataset
11 | from dataset import Mo2Cap2Dataset
12 | from torch.utils.data import DataLoader
13 | import numpy as np
14 | 
15 | class Sample:
16 |     def __init__(self, network_path, out_path, seq_length=20, latent_dim=128):
17 |         self.seq_length = seq_length
18 |         self.out_path = out_path
19 |         self.network = ConvVAE(in_channels=45, out_channels=45, latent_dim=latent_dim, seq_len=self.seq_length)
20 |         state_dict = torch.load(network_path)['state_dict']
21 |         self.network.load_state_dict(state_dict)
22 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
23 |         self.network = self.network.to(self.device)
24 |         
25 |         self.skeleton_model = Skeleton(None)
26 | 
27 |         # self.test_dataset = AMASSDataset(data_path='/home/wangjian/Develop/BodyPoseOptimization/AMASSDataConverter/pkls',
28 |         #                        frame_num=self.seq_length,
29 |         #                        is_train=False)
30 | 
31 |         self.test_dataset = Mo2Cap2Dataset(
32 |             pkl_path='../test_data_weipeng_studio/data_start_1187_end_1287/test_data.pkl')
33 | 
34 |         self.test_dataloader = DataLoader(self.test_dataset, batch_size=4, shuffle=False,
35 |                                           drop_last=False, num_workers=2)
36 |         
37 |     def save_sample(self, sample, out_path):
38 |         skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample)
39 |         for i, skeleton in enumerate(skeleton_list):
40 |             open3d.io.write_triangle_mesh(os.path.join(out_path, '{}.ply'.format(i)), skeleton)
41 |             
42 |     def show_example(self, sample):
43 |         skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample)
44 |         open3d.visualization.draw_geometries(skeleton_list)
45 |         
46 |     def test(self, visualization=False):
47 |         print('---------------------Start Eval-----------------------')
48 |         self.network.eval()
49 |         with torch.no_grad():
50 |             for i, relative_global_pose in tqdm(enumerate(self.test_dataloader)):
51 |                 relative_global_pose = relative_global_pose.to(self.device)
52 |                 relative_global_pose = relative_global_pose
53 |                 mu, std, z = self.network.get_latent_space(relative_global_pose)
54 |                 # print(mu)
55 |                 # print(std)
56 |                 #
57 |                 print('mu error is: {}'.format(torch.sum(torch.square(mu))))
58 |                 print('std error is: {}'.format(torch.sum(torch.square(std - 1))))
59 |                 print('---------------------------------------')
60 |                 
61 | 
62 |                 pose_preds, pose_inputs, _, _ = self.network(relative_global_pose)
63 |                 mu, std, z = self.network.get_latent_space(pose_preds)
64 |                 print('vae refined mu error is: {}'.format(torch.sum(torch.square(mu))))
65 |                 print('vae refined std error is: {}'.format(torch.sum(torch.square(std - 1))))
66 |                 print('***********************************************')
67 |                 
68 | 
69 |         
70 | if __name__ == '__main__':
71 |     
72 |     # sampler = Sample(network_path='logs/test/checkpoints/37.pth.tar',
73 |     #                  out_path='out/sample')
74 |     sampler = Sample(network_path='logs/real_full_dataset_latent_2048_len_5_slide_window_step_1_kl_0.5/checkpoints/19.pth.tar',
75 |                      out_path='out/test', seq_length=5, latent_dim=2048)
76 |     sampler.test(visualization=True)
77 | 


--------------------------------------------------------------------------------
/utils/rigid_transform_with_scale.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numpy.linalg
  3 | import random
  4 | import torch
  5 | 
  6 | # Relevant links:
  7 | #   - http://stackoverflow.com/a/32244818/263061 (solution with scale)
  8 | #   - "Least-Squares Rigid Motion Using SVD" (no scale but easy proofs and explains how weights could be added)
  9 | 
 10 | 
 11 | # Rigidly (+scale) aligns two point clouds with know point-to-point correspondences
 12 | # with least-squares error.
 13 | # Returns (scale factor c, rotation matrix R, translation vector t) such that
 14 | #   Q = P*cR + t
 15 | # if they align perfectly, or such that
 16 | #   SUM over point i ( | P_i*cR + t - Q_i |^2 )
 17 | # is minimised if they don't align perfectly.
 18 | def umeyama(P, Q):
 19 |     assert P.shape == Q.shape
 20 |     n, dim = P.shape
 21 | 
 22 |     centeredP = P - P.mean(axis=0)
 23 |     centeredQ = Q - Q.mean(axis=0)
 24 | 
 25 |     C = np.dot(np.transpose(centeredP), centeredQ) / n
 26 | 
 27 | 
 28 | 
 29 |     V, S, W = np.linalg.svd(C)
 30 |     d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0
 31 | 
 32 |     if d:
 33 |         S[-1] = -S[-1]
 34 |         V[:, -1] = -V[:, -1]
 35 | 
 36 |     R = np.dot(V, W)
 37 | 
 38 |     varP = np.var(P, axis=0).sum()
 39 |     c = 1/varP * np.sum(S) # scale factor
 40 | 
 41 |     t = Q.mean(axis=0) - P.mean(axis=0).dot(c*R)
 42 | 
 43 |     return c, R, t
 44 | 
 45 | def umeyama_pytorch(P, Q):
 46 |     assert P.shape == Q.shape
 47 |     n, dim = P.shape
 48 | 
 49 |     centeredP = P - torch.mean(P, dim=0)
 50 |     centeredQ = Q - torch.mean(Q, dim=0)
 51 | 
 52 |     C = centeredP.T @ centeredQ / n
 53 | 
 54 |     V, S, W = torch.svd(C)
 55 |     W = W.T
 56 |     d = (torch.det(V) * torch.det(W)) < 0.0
 57 | 
 58 |     if d:
 59 |         S[-1] = -S[-1]
 60 |         V[:, -1] = -V[:, -1]
 61 | 
 62 |     R = V @ W
 63 | 
 64 | 
 65 |     varP = torch.sum(torch.var(P, dim=0, unbiased=False))
 66 |     c = 1 / varP * torch.sum(S) # scale factor
 67 | 
 68 |     t = torch.mean(Q, dim=0) - torch.mean(P, dim=0).matmul(c * R)
 69 | 
 70 |     return c, R, t
 71 | 
 72 | def umeyama_ransac(P, Q, epsilon=0.2, n_iters=80):
 73 |     assert P.shape == Q.shape
 74 |     inliner_set = []
 75 |     point_length = P.shape[0]
 76 |     for i in range(n_iters):
 77 |         sampled_points = random.sample(list(range(point_length)), 4)
 78 |         sampled_P = P[sampled_points]
 79 |         sampled_Q = Q[sampled_points]
 80 |         c, R, t = umeyama(sampled_P, sampled_Q)
 81 | 
 82 |         projected_P = P @ R * c + t
 83 |         new_inliner_set = []
 84 |         for j in range(point_length):
 85 |             if np.linalg.norm(projected_P[j] - Q[j], ord=2) < epsilon:
 86 |                 new_inliner_set.append(j)
 87 |         if len(new_inliner_set) > len(inliner_set):
 88 |             inliner_set = new_inliner_set
 89 | 
 90 |     sampled_P = P[inliner_set]
 91 |     sampled_Q = Q[inliner_set]
 92 |     c, R, t = umeyama(sampled_P, sampled_Q)
 93 |     return c, R, t
 94 | 
 95 | def umeyama_dim_2(P, Q):
 96 |     assert P.shape == Q.shape
 97 |     n, dim1 = P.shape
 98 | 
 99 |     centeredP = P
100 |     centeredQ = Q
101 | 
102 |     C = np.dot(np.transpose(centeredP), centeredQ) / n
103 | 
104 |     V, S, W = np.linalg.svd(C)
105 |     d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0
106 | 
107 |     if d:
108 |         S[-1] = -S[-1]
109 |         V[:, -1] = -V[:, -1]
110 | 
111 |     R = np.dot(V, W)
112 | 
113 |     varP = np.var(P, axis=0).sum()
114 |     c = 1/varP * np.sum(S) # scale factor
115 | 
116 |     t = Q.mean(axis=0) - P.mean(axis=0).dot(c*R)
117 | 
118 |     return c, R, t
119 | 
120 | if __name__ == '__main__':
121 |     a = np.random.normal(size=(15, 3))
122 |     b = np.random.normal(size=(15, 3))
123 | 
124 |     result1 = umeyama(a.copy(), b.copy())
125 |     print(result1)
126 |     result2 = umeyama_pytorch(torch.from_numpy(a), torch.from_numpy(b))
127 |     print(result2)


--------------------------------------------------------------------------------
/utils/pytorch_gmm_from_scipy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from math import pi
 5 | from scipy.special import logsumexp
 6 | from sklearn.mixture import GaussianMixture
 7 | import pickle
 8 | 
 9 | 
10 | class GaussianMixturePytorchFromScipy:
11 |     """
12 |     Fits a mixture of k=1,..,K Gaussians to the input data (K is supplied via n_components). Input tensors are expected to be flat with dimensions (n: number of samples, d: number of features).
13 |     The model then extends them to (n, 1, d).
14 |     The model parametrization (mu, sigma) is stored as (1, k, d), and probabilities are shaped (n, k, 1) if they relate to an individual sample, or (1, k, 1) if they assign membership probabilities to one of the mixture components.
15 |     """
16 |     
17 |     def __init__(self, scipy_model_path):
18 | 
19 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
20 |         
21 |         self.mu, self.precisions_cholesky, self.covariances, self.covariance_type = \
22 |             self.load_from_scipy_model(scipy_model_path)
23 | 
24 |     
25 |     def load_from_scipy_model(self, scipy_model_path):
26 |         with open(scipy_model_path, 'rb') as f:
27 |             gmm: GaussianMixture = pickle.load(f)
28 |         means = torch.from_numpy(gmm.means_).float().to(self.device)
29 |         precisions_cholesky = torch.from_numpy(gmm.precisions_cholesky_).float().to(self.device)
30 |         covariances = torch.from_numpy(gmm.covariances_).float().to(self.device)
31 |         cov_type = gmm.covariance_type
32 |         return means, precisions_cholesky, covariances, cov_type
33 |     
34 |     def _estimate_log_prob(self, X):
35 |         return self._estimate_log_gaussian_prob(
36 |             X, self.mu, self.precisions_cholesky)
37 |     
38 |     def score_samples(self, X):
39 |         X = X.to(self.device)
40 |         return torch.logsumexp(self._estimate_log_prob(X), dim=1)
41 |     
42 |     def _compute_log_det_cholesky(self, matrix_chol, n_features):
43 |         """
44 |         Compute the log-det of the cholesky decomposition of matrices.
45 |         """
46 |         if self.covariance_type == 'full':
47 |             n_components, _, _ = matrix_chol.shape
48 |             log_det_chol = (torch.sum(torch.log(matrix_chol.view(n_components, -1)[:, ::n_features + 1]), 1))
49 |         elif self.covariance_type == 'diag':
50 |             log_det_chol = (torch.sum(torch.log(matrix_chol), axis=1))
51 |         
52 |         return log_det_chol
53 |     
54 |     def _estimate_log_gaussian_prob(self, X, means, precisions_chol):
55 |         n_samples, n_features = X.shape
56 |         n_components, _ = means.shape
57 |         # det(precision_chol) is half of det(precision)
58 |         log_det = self._compute_log_det_cholesky(
59 |             precisions_chol, n_features)
60 |         
61 |         if self.covariance_type == 'full':
62 |             log_prob = torch.empty((n_samples, n_components)).to(self.device)
63 |             for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)):
64 |                 y = torch.matmul(X, prec_chol) - torch.matmul(mu, prec_chol)
65 |                 log_prob[:, k] = torch.sum(torch.square(y), axis=1)
66 |         elif self.covariance_type == 'diag':
67 |             precisions = precisions_chol ** 2
68 |             log_prob = (torch.sum((means ** 2 * precisions), 1) -
69 |                         2. * torch.matmul(X, (means * precisions).T) +
70 |                         torch.matmul(X ** 2, precisions.T))
71 |         else:
72 |             raise Exception('wrong covariance type!')
73 |         
74 |         return -.5 * (n_features * torch.log(torch.Tensor([2 * np.pi])).to(self.device) + log_prob) + log_det
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     gmm = GaussianMixturePytorchFromScipy('../AMASSDataConverter/model_global_ncomp_1_nframe_5_slidewindow_True_covtype_diag.pkl')
79 |     X_pose = torch.randn(5, 5, 15, 3)
80 |     X = X_pose.view(-1, 225)
81 |     res = gmm.score_samples(X)
82 |     conv = gmm.covariances.detach().cpu().numpy()
83 |     print(res)
84 | 
85 |     with open('../AMASSDataConverter/model_global_ncomp_1_nframe_5_slidewindow_True_covtype_diag.pkl', 'rb') as f:
86 |         gmm_scipy: GaussianMixture = pickle.load(f)
87 |     X = X.cpu().numpy()
88 |     print(gmm_scipy.score_samples(X))
89 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os
  3 | import shutil
  4 | import numpy as np
  5 | from scipy.spatial.transform import Rotation
  6 | 
  7 | def transformation_matrix_to_translation_and_rotation(mat):
  8 |     rotation_matrix = mat[:3, :3]
  9 |     translation = mat[:3, 3]
 10 | 
 11 |     # rotation matrix to rotation euler
 12 |     rotation_euler = Rotation.from_matrix(rotation_matrix).as_euler('xyz')
 13 |     return rotation_euler, translation
 14 | 
 15 | 
 16 | def get_relative_transform(location1, rotation1, location2, rotation2):
 17 |     # input: location and rotation in blender coordinate
 18 |     # out: rotation, translation and transform matrix in OpenCV coordinate
 19 |     T_world2cv1, R_world2cv1, mat_world2cv1 = get_cv_rt_from_blender(location1, rotation1)
 20 |     T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_blender(location2, rotation2)
 21 | 
 22 |     mat_cv1_2world = np.linalg.inv(mat_world2cv1)
 23 | 
 24 |     mat_cv1_to_cv2 = mat_cv1_2world.dot(mat_world2cv2)
 25 |     # mat cv1 to cv2 is the coordinate transformation, we need to change it to the object transformation
 26 |     mat_cv2_to_cv1 = np.linalg.inv(mat_cv1_to_cv2)
 27 | 
 28 |     rotation, translation = transformation_matrix_to_translation_and_rotation(mat_cv2_to_cv1)
 29 |     return rotation, translation, mat_cv2_to_cv1
 30 | 
 31 | 
 32 | def get_transform_relative_to_base_cv(base_location, base_rotation, location, rotation):
 33 |     # input: location and rotation in blender coordinate
 34 |     # out: rotation, translation and transform matrix in OpenCV coordinate
 35 |     T_world2cv_base, R_world2cv_base, mat_world2cv_base = get_cv_rt_from_cv(base_location, base_rotation)
 36 |     T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_cv(location, rotation)
 37 | 
 38 |     # mat_cv2world2 = np.linalg.inv(mat_world2cv2)
 39 |     # location_cv_homo = mat_cv2world2[:, 3]
 40 | 
 41 |     location_cv_homo = np.concatenate([location, np.ones(1)])
 42 | 
 43 |     R_cv2_2_base = R_world2cv2.T.dot(R_world2cv_base)
 44 |     new_rotation_euler = Rotation.from_matrix(R_cv2_2_base).as_euler(seq='xyz')
 45 | 
 46 |     new_location_homo = mat_world2cv_base.dot(location_cv_homo)
 47 |     new_location = new_location_homo[:3]
 48 | 
 49 |     return new_location, new_rotation_euler
 50 | 
 51 | def get_transform_relative_to_base_blender(base_location, base_rotation, location, rotation):
 52 |     T_world2cv_base, R_world2cv_base, mat_world2cv_base = get_cv_rt_from_blender(base_location, base_rotation)
 53 |     T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_blender(location, rotation)
 54 | 
 55 |     location_cv_homo = np.concatenate([location, np.ones(1)])
 56 | 
 57 |     R_cv2_2_base = R_world2cv2.T.dot(R_world2cv_base)
 58 |     new_rotation_euler = Rotation.from_matrix(R_cv2_2_base).as_euler(seq='xyz')
 59 | 
 60 |     new_location_homo = mat_world2cv_base.dot(location_cv_homo)
 61 |     new_location = new_location_homo[:3]
 62 | 
 63 |     return new_location, new_rotation_euler
 64 | 
 65 | # code modified from zaw lin
 66 | def get_cv_rt_from_blender(location, rotation):
 67 |     # bcam stands for blender camera
 68 |     R_bcam2cv = np.array(
 69 |         [[1, 0, 0],
 70 |          [0, -1, 0],
 71 |          [0, 0, -1]])
 72 | 
 73 |     # Transpose since the rotation is object rotation,
 74 |     # and we want coordinate rotation
 75 |     # R_world2bcam = cam.rotation_euler.to_matrix().transposed()
 76 |     # T_world2bcam = -1*R_world2bcam * location
 77 |     #
 78 |     # Use matrix_world instead to account for all constraints
 79 | 
 80 |     R_world2bcam = Rotation.from_euler('xyz', rotation, degrees=False).as_matrix().T
 81 | 
 82 |     # Convert camera location to translation vector used in coordinate changes
 83 |     # T_world2bcam = -1*R_world2bcam*cam.location
 84 |     # Use location from matrix_world to account for constraints:
 85 |     T_world2bcam = -1 * R_world2bcam.dot(location)
 86 | 
 87 |     # Build the coordinate transform matrix from world to computer vision camera
 88 |     R_world2cv = R_bcam2cv.dot(R_world2bcam)
 89 |     T_world2cv = R_bcam2cv.dot(T_world2bcam)
 90 | 
 91 |     #put into 3x4 matrix
 92 |     mat = np.array([
 93 |         np.concatenate([R_world2cv[0], [T_world2cv[0]]]),
 94 |         np.concatenate([R_world2cv[1], [T_world2cv[1]]]),
 95 |         np.concatenate([R_world2cv[2], [T_world2cv[2]]]),
 96 |         [0, 0, 0, 1]
 97 |     ])
 98 |     return T_world2cv, R_world2cv, mat
 99 | 
100 | # code modified from zaw lin
101 | def get_cv_rt_from_cv(location, rotation):
102 | 
103 |     # Transpose since the rotation is object rotation,
104 |     # and we want coordinate rotation
105 |     # R_world2bcam = cam.rotation_euler.to_matrix().transposed()
106 |     # T_world2bcam = -1*R_world2bcam * location
107 |     #
108 |     # Use matrix_world instead to account for all constraints
109 | 
110 |     R_world2cv = Rotation.from_euler('xyz', rotation, degrees=False).as_matrix().T
111 | 
112 |     # Convert camera location to translation vector used in coordinate changes
113 |     # T_world2bcam = -1*R_world2bcam*cam.location
114 |     # Use location from matrix_world to account for constraints:
115 |     T_world2cv = -1 * R_world2cv.dot(location)
116 | 
117 |     #put into 3x4 matrix
118 |     mat = np.array([
119 |         np.concatenate([R_world2cv[0], [T_world2cv[0]]]),
120 |         np.concatenate([R_world2cv[1], [T_world2cv[1]]]),
121 |         np.concatenate([R_world2cv[2], [T_world2cv[2]]]),
122 |         [0, 0, 0, 1]
123 |     ])
124 |     return T_world2cv, R_world2cv, mat


--------------------------------------------------------------------------------
/networks/make_dataset.py:
--------------------------------------------------------------------------------
  1 | import open3d
  2 | import torch
  3 | from torch.utils.data.dataset import Dataset
  4 | from torch.utils.data.dataloader import DataLoader
  5 | import sys
  6 | sys.path.append('..')
  7 | from tqdm import tqdm
  8 | import pickle
  9 | import os
 10 | from utils.utils import get_relative_global_pose, get_relative_global_pose_with_camera_matrix, trans_qrot_to_matrix
 11 | import numpy as np
 12 | import h5py
 13 | 
 14 | 
 15 | class HDF5Store(object):
 16 |     """
 17 |     Simple class to append value to a hdf5 file on disc (usefull for building keras datasets)
 18 | 
 19 |     Params:
 20 |         datapath: filepath of h5 file
 21 |         dataset: dataset name within the file
 22 |         shape: dataset shape (not counting main/batch axis)
 23 |         dtype: numpy dtype
 24 | 
 25 |     Usage:
 26 |         hdf5_store = HDF5Store('/tmp/hdf5_store.h5','X', shape=(20,20,3))
 27 |         x = np.random.random(hdf5_store.shape)
 28 |         hdf5_store.append(x)
 29 |         hdf5_store.append(x)
 30 | 
 31 |     From https://gist.github.com/wassname/a0a75f133831eed1113d052c67cf8633
 32 |     """
 33 |     
 34 |     def __init__(self, datapath, dataset_shape_dict: dict, dtype=np.float32):
 35 |         self.datapath = datapath
 36 |         self.dataset_shape_dict = dataset_shape_dict
 37 |         
 38 |         with h5py.File(self.datapath, mode='w') as h5f:
 39 |             for dataset_name in self.dataset_shape_dict.keys():
 40 |                 h5f.create_dataset(
 41 |                     dataset_name,
 42 |                     shape=(0,) + self.dataset_shape_dict[dataset_name],
 43 |                     maxshape=(None,) + self.dataset_shape_dict[dataset_name],
 44 |                     dtype=dtype)
 45 |     
 46 |     def append(self, values_batch_dict: dict):
 47 |         # print('append data!')
 48 |         with h5py.File(self.datapath, mode='a') as h5f:
 49 |             for dataset_name, values_batch in values_batch_dict.items():
 50 |                 batch_size = values_batch.shape[0]
 51 |                 dset = h5f[dataset_name]
 52 |                 dset_len = dset.shape[0]
 53 |                 shape = dset.shape[1:]
 54 |                 dset.resize((dset_len + batch_size,) + shape)
 55 |                 for i in range(dset_len, dset_len + batch_size):
 56 |                     dset[i] = [values_batch[i - dset_len]]
 57 |             h5f.flush()
 58 | 
 59 | 
 60 | def main(source_dir, output_path, frame_num, fps, slide_window):
 61 |     pkl_path_list = os.listdir(source_dir)
 62 |     dataset_shape_dict = {"relative_global_pose": (frame_num, 15, 3),
 63 |                           "local_pose": (frame_num, 15, 3),
 64 |                           "camera_matrix": (frame_num, 4, 4)}
 65 |     hdf5_store = HDF5Store(output_path, dataset_shape_dict)
 66 |     for file_name in tqdm(pkl_path_list):
 67 |         file_path = os.path.join(source_dir, file_name)
 68 |         relative_global_pose_seq_list, local_pose_seq_list, camera_matrix_seq_list = \
 69 |             get_relative_global_pose_list(file_path, frame_num, fps, slide_window)
 70 |         values_batch_dict = {"relative_global_pose": relative_global_pose_seq_list,
 71 |                           "local_pose": local_pose_seq_list,
 72 |                           "camera_matrix": camera_matrix_seq_list}
 73 |         hdf5_store.append(values_batch_dict)
 74 |                 
 75 |         
 76 | def interpolate(original_sequence, interpolate_time=5):
 77 |     result_list = []
 78 |     for i in range(len(original_sequence) - 1):
 79 |         local_pose_i = original_sequence[i]
 80 |         local_pose_i_1 = original_sequence[i + 1]
 81 |         diff = local_pose_i_1 - local_pose_i
 82 |         res = np.empty(shape=(interpolate_time, ) + local_pose_i.shape)
 83 |         for j in range(interpolate_time):
 84 |             res[j] = local_pose_i + diff * j / interpolate_time
 85 |         result_list.append(res)
 86 |     return np.concatenate(result_list, axis=0)
 87 | 
 88 | def get_relative_global_pose_list(data_path, frame_num=5, fps=25, slide_window=True):
 89 |     total_frame_number = frame_num
 90 |     relative_global_pose_seq_list = []
 91 |     
 92 |     local_pose_seq_list = []
 93 |     camera_seq_list = []
 94 |     camera_matrix_seq_list = []
 95 |     with open(data_path, 'rb') as f:
 96 |         seq_data = pickle.load(f)
 97 |     seq_len = len(seq_data['local_pose_list'])
 98 |     data_frame_rate = int(seq_data['frame_rate'])
 99 |     frame_rate_timer = round(data_frame_rate / fps)
100 |     if slide_window is True:
101 |         interval = 1
102 |     else:
103 |         interval = total_frame_number * frame_rate_timer
104 |     for i in range(0, seq_len - total_frame_number * frame_rate_timer, interval):
105 |         local_pose_seq_list.append(
106 |             seq_data['local_pose_list'][i: i + total_frame_number * frame_rate_timer: frame_rate_timer])
107 |         camera_seq_list.append(
108 |             seq_data['cam_list'][i: i + total_frame_number * frame_rate_timer: frame_rate_timer])
109 |     for local_pose_seq, camera_seq in zip(local_pose_seq_list, camera_seq_list):
110 |         relative_global_pose_seq = get_relative_global_pose(local_pose_seq, camera_seq)
111 |         camera_matrix_seq = []
112 |         for camera in camera_seq:
113 |             camera_matrix = trans_qrot_to_matrix(camera['loc'], camera['rot'])
114 |             camera_matrix_seq.append(camera_matrix)
115 |         camera_matrix_seq_list.append(camera_matrix_seq)
116 |         # select {frame_num} pose from relative global pose list
117 |         final_relative_global_pose_seq = []
118 |         for i in range(0, len(relative_global_pose_seq)):
119 |             final_relative_global_pose_seq.append(relative_global_pose_seq[i])
120 |         
121 |         relative_global_pose_seq_list.append(final_relative_global_pose_seq)
122 |     
123 |     return np.asarray(relative_global_pose_seq_list), np.asarray(local_pose_seq_list), np.asarray(camera_matrix_seq_list)
124 | 
125 | 
126 | if __name__ == '__main__':
127 |     source_dir = '/home/jianwang/ScanNet/static00/EgocentricAMASS'
128 |     
129 |     frame_num, fps, slide_window = 10, 25, True
130 |     target_file_path = '/home/jianwang/ScanNet/static00/EgocentricAMASSPytorch/local_relative_global_pose_{}_{}.h5'.format(frame_num, fps)
131 |     main(source_dir, target_file_path, frame_num, fps, slide_window)


--------------------------------------------------------------------------------
/networks/train.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append('..')
  4 | import torch
  5 | import numpy as np
  6 | 
  7 | from config import args
  8 | import datetime
  9 | import os
 10 | from torch.utils.data import DataLoader
 11 | from tqdm import tqdm
 12 | from torch.utils.tensorboard import SummaryWriter
 13 | from torch.optim import Adam
 14 | from pprint import pprint
 15 | from models.SeqConvVAE import ConvVAE
 16 | from dataset.global_dataset import AMASSDataset
 17 | 
 18 | 
 19 | def prepare_log_dir(log_dir=None):
 20 |     # prepare log dirs
 21 |     if args.log_dir is None and log_dir is None:
 22 |         log_dir = datetime.datetime.now().strftime('%m.%d-%H:%M:%S')
 23 |     elif log_dir is None:
 24 |         log_dir = args.log_dir
 25 |     log_dir = os.path.join('logs', log_dir)
 26 |     print('making save dir at: {}'.format(log_dir))
 27 |     os.makedirs(log_dir)
 28 |     checkpoints_dir = os.path.join(log_dir, 'checkpoints')
 29 |     tensorboard_dir = os.path.join(log_dir, 'tensorboard')
 30 |     os.makedirs(checkpoints_dir)
 31 |     os.makedirs(tensorboard_dir)
 32 |     return checkpoints_dir, tensorboard_dir
 33 | 
 34 | 
 35 | class Train:
 36 |     def __init__(self):
 37 | 
 38 |         self.seq_length = args.seq_length
 39 |         self.attention = args.attention
 40 |         
 41 |         self.amass_train_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=True,
 42 |                                                 windows_size=args.slide_window_step, balance_distrib=args.data_balance,
 43 |                                                 with_mo2cap2_data=args.with_mo2cap2_data)
 44 |         self.train_dataloader = DataLoader(dataset=self.amass_train_dataset, batch_size=args.batch_size, shuffle=True,
 45 |                                            drop_last=True, num_workers=args.num_workers)
 46 |         
 47 |         self.test_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=False,
 48 |                                          windows_size=args.slide_window_step, balance_distrib=args.data_balance,
 49 |                                          with_mo2cap2_data=args.with_mo2cap2_data)
 50 |         self.test_dataloader = DataLoader(self.test_dataset, batch_size=args.batch_size, shuffle=False,
 51 |                                           drop_last=False, num_workers=args.num_workers)
 52 | 
 53 |         self.network = ConvVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim,
 54 |                                seq_len=self.seq_length)
 55 |         
 56 |         self.optimizer = Adam(params=self.network.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
 57 |         
 58 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 59 |         
 60 |         self.network = self.network.to(self.device)
 61 |         
 62 |         self.checkpoint_dir, tensorboard_dir = prepare_log_dir()
 63 |         self.tensorboard_writer = SummaryWriter(tensorboard_dir)
 64 |     
 65 |     def train(self):
 66 |         print('---------------------Start Training-----------------------')
 67 |         pprint(args.__dict__)
 68 |         running_loss = 0
 69 |         running_recon_loss = 0
 70 |         count = 0
 71 |         for e in range(args.epoch):
 72 |             self.network.train()
 73 |             print('-----------------Epoch: {}-----------------'.format(str(e)))
 74 |             for i, relative_global_pose in tqdm(enumerate(self.train_dataloader)):
 75 |                 self.optimizer.zero_grad()
 76 |                 relative_global_pose = relative_global_pose.to(self.device)
 77 |                 
 78 |                 pose_preds, pose_input, mu, log_var = self.network(relative_global_pose)
 79 |                 loss, recon_loss, kld_loss = self.network.loss_function(pose_preds, pose_input, mu, log_var,
 80 |                                                                         M_N=args.kl_weight * args.batch_size / len(
 81 |                                                                             self.amass_train_dataset))
 82 |                 loss.backward()
 83 |                 self.optimizer.step()
 84 |                 
 85 |                 running_loss += loss.item()
 86 |                 running_recon_loss += recon_loss.item()
 87 |                 # logs
 88 |                 if count % args.log_step == 0 and count != 0:
 89 |                     # ...log the running loss
 90 |                     self.tensorboard_writer.add_scalar('training loss', running_loss, count)
 91 |                     self.tensorboard_writer.add_scalar('training recon loss', running_recon_loss, count)
 92 |                     print("running loss is: {}".format(running_loss))
 93 |                     print("running recon loss is: {}".format(running_recon_loss))
 94 |                     running_loss = 0
 95 |                     running_recon_loss = 0
 96 |                 count += 1
 97 |             # log the logs
 98 |             eval_loss = self.eval()
 99 |             self.tensorboard_writer.add_scalar('eval loss', eval_loss, e)
100 |             print('eval loss is: {}'.format(eval_loss))
101 |             
102 |             torch.save({
103 |                 'epoch': e + 1,
104 |                 'args': args.__dict__,
105 |                 'state_dict': self.network.state_dict(),
106 |                 'eval_result': eval_loss,
107 |                 'optimizer': self.optimizer.state_dict(),
108 |             }, os.path.join(self.checkpoint_dir, str(e) + '.pth.tar'))
109 |     
110 |     def eval(self):
111 |         print('---------------------Start Eval-----------------------')
112 |         self.network.eval()
113 |         mpjpe_list = []
114 |         with torch.no_grad():
115 |             for i, relative_global_pose in tqdm(enumerate(self.test_dataloader)):
116 |                 relative_global_pose = relative_global_pose.to(self.device)
117 |                 pose_preds, pose_input, _, _ = self.network(relative_global_pose)
118 |                 pose_preds = pose_preds.cpu().numpy()
119 |                 pose_input = pose_input.cpu().numpy()
120 |                 pose_preds = np.reshape(pose_preds, [-1, self.seq_length, 15, 3])
121 |                 pose_gt = np.reshape(pose_input, [-1, self.seq_length, 15, 3])
122 |                 mpjpe_list.append(self.mpjpe(pose_preds, pose_gt))
123 |         
124 |         print('MPJPE is: {}'.format(np.mean(mpjpe_list)))
125 |         return np.mean(mpjpe_list)
126 |     
127 |     def mpjpe(self, pose_preds, pose_gt):
128 |         distance = np.linalg.norm(pose_gt - pose_preds, axis=3)
129 |         return np.mean(distance)
130 | 
131 | 
132 | if __name__ == '__main__':
133 |     train = Train()
134 |     train.train()
135 | 


--------------------------------------------------------------------------------
/utils/fisheye/FishEyeCalibrated.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import numpy as np
  3 | import torch
  4 | from copy import deepcopy
  5 | 
  6 | class FishEyeCameraCalibrated:
  7 |     def __init__(self, calibration_file_path, use_gpu=False):
  8 |         with open(calibration_file_path) as f:
  9 |             calibration_data = json.load(f)
 10 |         self.intrinsic = np.array(calibration_data['intrinsic'])
 11 |         self.img_size = np.array(calibration_data['size'])  # w, h
 12 |         self.fisheye_polynomial = np.array(calibration_data['polynomialC2W'])
 13 |         self.fisheye_inverse_polynomial = np.array(calibration_data['polynomialW2C'])
 14 |         self.img_center = np.array([self.intrinsic[0][2], self.intrinsic[1][2]])
 15 |         self.use_gpu = use_gpu
 16 |         # self.img_center = np.array([self.img_size[0] / 2, self.img_size[1] / 2])
 17 |     
 18 |     def camera2world(self, point: np.ndarray, depth: np.ndarray):
 19 |         """
 20 |         point: np.ndarray of 2D points on image (n * 2)
 21 |         depth: np.ndarray of depth of every 2D points (n)
 22 |         """
 23 |         depth = depth.astype(np.float)
 24 |         point_centered = point.astype(np.float) - self.img_center
 25 |         x = point_centered[:, 0]
 26 |         y = point_centered[:, 1]
 27 |         distance_from_center = np.sqrt(np.square(x) + np.square(y))
 28 |         
 29 |         z = np.polyval(p=self.fisheye_polynomial[::-1], x=distance_from_center)
 30 |         point_3d = np.array([x, y, -z])  # 3, n
 31 |         norm = np.linalg.norm(point_3d, axis=0)
 32 |         point_3d = point_3d / norm * depth
 33 |         return point_3d.transpose()
 34 | 
 35 |     def getPolyVal(self, p, x):
 36 |         curVal = torch.zeros_like(x)
 37 |         for curValIndex in range(len(p) - 1):
 38 |             curVal = (curVal + p[curValIndex]) * x
 39 |         return curVal + p[len(p) - 1]
 40 |     
 41 |     def camera2world_pytorch(self, point: torch.Tensor, depth: torch.Tensor):
 42 |         """
 43 |                 point: np.ndarray of 2D points on image (n * 2)
 44 |                 depth: np.ndarray of depth of every 2D points (n)
 45 |                 """
 46 |         point_centered = point.float() - self.img_center
 47 |         x = point_centered[:, 0]
 48 |         y = point_centered[:, 1]
 49 |         distance_from_center = torch.sqrt(torch.square(x) + torch.square(y))
 50 |         
 51 |         z = self.getPolyVal(p=self.fisheye_polynomial[::-1], x=distance_from_center)
 52 |         point_3d = torch.stack([x, y, -z]).float()  # 3, n
 53 |         norm = torch.norm(point_3d, dim=0)
 54 |         point_3d = point_3d / norm * depth.float()
 55 |         return point_3d.t()
 56 |     
 57 |     def world2camera(self, point3D):
 58 |         point3D = deepcopy(point3D)
 59 |         point3D[:, 2] = point3D[:, 2] * -1
 60 |         point3D = point3D.T
 61 |         xc, yc = self.img_center[0], self.img_center[1]
 62 |         point2D = []
 63 |         
 64 |         norm = np.linalg.norm(point3D[:2], axis=0)
 65 |         
 66 |         if (norm != 0).all():
 67 |             theta = np.arctan(point3D[2] / norm)
 68 |             invnorm = 1.0 / norm
 69 |             t = theta
 70 |             rho = self.fisheye_inverse_polynomial[0]
 71 |             t_i = 1.0
 72 |             
 73 |             for i in range(1, len(self.fisheye_inverse_polynomial)):
 74 |                 t_i *= t
 75 |                 rho += t_i * self.fisheye_inverse_polynomial[i]
 76 |             
 77 |             x = point3D[0] * invnorm * rho
 78 |             y = point3D[1] * invnorm * rho
 79 |             
 80 |             point2D.append(x + xc)
 81 |             point2D.append(y + yc)
 82 |         else:
 83 |             point2D.append(xc)
 84 |             point2D.append(yc)
 85 |             raise Exception("norm is zero!")
 86 |         
 87 |         return np.asarray(point2D).T
 88 | 
 89 |     def world2camera_with_depth(self, point3D):
 90 |         point3D_cloned = deepcopy(point3D)
 91 |         point2D = self.world2camera(point3D_cloned)
 92 | 
 93 |         depth = np.linalg.norm(point3D, axis=-1)
 94 |         return point2D, depth
 95 |     
 96 |     def world2camera_pytorch(self, point3d_original: torch.Tensor):
 97 |         fisheye_inv_polynomial = self.fisheye_inverse_polynomial
 98 |         point3d = point3d_original.clone()
 99 |         point3d[:, 2] = point3d_original[:, 2] * -1
100 |         point3d = point3d.transpose(0, 1)
101 |         xc, yc = self.img_center[0], self.img_center[1]
102 |         xc = torch.Tensor([xc]).float().to(point3d.device)
103 |         yc = torch.Tensor([yc]).float().to(point3d.device)
104 |         point2d = torch.empty((2, point3d.shape[-1])).to(point3d.device)
105 |         
106 |         norm = torch.norm(point3d[:2], dim=0)
107 |         
108 |         if (norm != 0).all():
109 |             theta = torch.atan(point3d[2] / norm)
110 |             invnorm = 1.0 / norm
111 |             t = theta
112 |             rho = fisheye_inv_polynomial[0]
113 |             t_i = 1.0
114 |             
115 |             for i in range(1, len(fisheye_inv_polynomial)):
116 |                 t_i *= t
117 |                 rho += t_i * fisheye_inv_polynomial[i]
118 |             
119 |             x = point3d[0] * invnorm * rho
120 |             y = point3d[1] * invnorm * rho
121 |             
122 |             point2d[0] = x + xc
123 |             point2d[1] = y + yc
124 |         else:
125 |             point2d[0] = xc
126 |             point2d[1] = yc
127 |             raise Exception("norm is zero!")
128 |         
129 |         return point2d.transpose(0, 1)
130 | 
131 |     def undistort(self, point_2Ds):
132 |         """
133 |         undistort the input 2d points in fisheye camera
134 |         """
135 |         point_length = point_2Ds.shape[0]
136 |         depths = np.ones(shape=point_length)
137 |         point_3Ds = self.camera2world(point_2Ds, depths)
138 | 
139 |         # point_3Ds_homo = np.ones((point_length, 4))
140 |         # point_3Ds_homo[:, :3] = point_3Ds
141 | 
142 |         projected_2d_points = (self.intrinsic[:3, :3] @ point_3Ds.T).T
143 |         projected_2d_points = projected_2d_points[:, :2] / projected_2d_points[:, 2:]
144 |         return projected_2d_points
145 | 
146 | 
147 | 
148 | 
149 | if __name__ == '__main__':
150 |     camera = FishEyeCameraCalibrated('fisheye.calibration.json')
151 |     point = np.array([[660, 520], [520, 660], [123, 456]])
152 |     depth = np.array([30, 30, 40])
153 |     point = torch.from_numpy(point)
154 |     depth = torch.from_numpy(depth)
155 |     point3d = camera.camera2world_pytorch(point, depth)
156 |     print(point3d)
157 |     
158 |     # reprojected_point_2d = camera.world2camera(point3d)
159 |     # print(reprojected_point_2d)
160 |     
161 |     reprojected_point_2d = camera.world2camera(point3d.numpy())
162 |     print(reprojected_point_2d)
163 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/fisheye/FishEyeCalibrated.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import numpy as np
  3 | import torch
  4 | from copy import deepcopy
  5 | 
  6 | class FishEyeCameraCalibrated:
  7 |     def __init__(self, calibration_file_path, use_gpu=False):
  8 |         with open(calibration_file_path) as f:
  9 |             calibration_data = json.load(f)
 10 |         self.intrinsic = np.array(calibration_data['intrinsic'])
 11 |         self.img_size = np.array(calibration_data['size'])  # w, h
 12 |         self.fisheye_polynomial = np.array(calibration_data['polynomialC2W'])
 13 |         self.fisheye_inverse_polynomial = np.array(calibration_data['polynomialW2C'])
 14 |         self.img_center = np.array([self.intrinsic[0][2], self.intrinsic[1][2]])
 15 |         self.use_gpu = use_gpu
 16 |         # self.img_center = np.array([self.img_size[0] / 2, self.img_size[1] / 2])
 17 |     
 18 |     def camera2world(self, point: np.ndarray, depth: np.ndarray):
 19 |         """
 20 |         point: np.ndarray of 2D points on image (n * 2)
 21 |         depth: np.ndarray of depth of every 2D points (n)
 22 |         """
 23 |         depth = depth.astype(np.float)
 24 |         point_centered = point.astype(np.float) - self.img_center
 25 |         x = point_centered[:, 0]
 26 |         y = point_centered[:, 1]
 27 |         distance_from_center = np.sqrt(np.square(x) + np.square(y))
 28 |         
 29 |         z = np.polyval(p=self.fisheye_polynomial[::-1], x=distance_from_center)
 30 |         point_3d = np.array([x, y, -z])  # 3, n
 31 |         norm = np.linalg.norm(point_3d, axis=0)
 32 |         point_3d = point_3d / norm * depth
 33 |         return point_3d.transpose()
 34 | 
 35 |     def getPolyVal(self, p, x):
 36 |         curVal = torch.zeros_like(x)
 37 |         for curValIndex in range(len(p) - 1):
 38 |             curVal = (curVal + p[curValIndex]) * x
 39 |         return curVal + p[len(p) - 1]
 40 |     
 41 |     def camera2world_pytorch(self, point: torch.Tensor, depth: torch.Tensor):
 42 |         """
 43 |                 point: np.ndarray of 2D points on image (n * 2)
 44 |                 depth: np.ndarray of depth of every 2D points (n)
 45 |                 """
 46 |         point_centered = point.float() - self.img_center
 47 |         x = point_centered[:, 0]
 48 |         y = point_centered[:, 1]
 49 |         distance_from_center = torch.sqrt(torch.square(x) + torch.square(y))
 50 |         
 51 |         z = self.getPolyVal(p=self.fisheye_polynomial[::-1], x=distance_from_center)
 52 |         point_3d = torch.stack([x, y, -z]).float()  # 3, n
 53 |         norm = torch.norm(point_3d, dim=0)
 54 |         point_3d = point_3d / norm * depth.float()
 55 |         return point_3d.t()
 56 |     
 57 |     def world2camera(self, point3D):
 58 |         point3D = deepcopy(point3D)
 59 |         point3D[:, 2] = point3D[:, 2] * -1
 60 |         point3D = point3D.T
 61 |         xc, yc = self.img_center[0], self.img_center[1]
 62 |         point2D = []
 63 |         
 64 |         norm = np.linalg.norm(point3D[:2], axis=0)
 65 |         
 66 |         if (norm != 0).all():
 67 |             theta = np.arctan(point3D[2] / norm)
 68 |             invnorm = 1.0 / norm
 69 |             t = theta
 70 |             rho = self.fisheye_inverse_polynomial[0]
 71 |             t_i = 1.0
 72 |             
 73 |             for i in range(1, len(self.fisheye_inverse_polynomial)):
 74 |                 t_i *= t
 75 |                 rho += t_i * self.fisheye_inverse_polynomial[i]
 76 |             
 77 |             x = point3D[0] * invnorm * rho
 78 |             y = point3D[1] * invnorm * rho
 79 |             
 80 |             point2D.append(x + xc)
 81 |             point2D.append(y + yc)
 82 |         else:
 83 |             point2D.append(xc)
 84 |             point2D.append(yc)
 85 |             raise Exception("norm is zero!")
 86 |         
 87 |         return np.asarray(point2D).T
 88 | 
 89 |     def world2camera_with_depth(self, point3D):
 90 |         point3D_cloned = deepcopy(point3D)
 91 |         point2D = self.world2camera(point3D_cloned)
 92 | 
 93 |         depth = np.linalg.norm(point3D, axis=-1)
 94 |         return point2D, depth
 95 |     
 96 |     def world2camera_pytorch(self, point3d_original: torch.Tensor):
 97 |         fisheye_inv_polynomial = self.fisheye_inverse_polynomial
 98 |         point3d = point3d_original.clone()
 99 |         point3d[:, 2] = point3d_original[:, 2] * -1
100 |         point3d = point3d.transpose(0, 1)
101 |         xc, yc = self.img_center[0], self.img_center[1]
102 |         xc = torch.Tensor([xc]).float().to(point3d.device)
103 |         yc = torch.Tensor([yc]).float().to(point3d.device)
104 |         point2d = torch.empty((2, point3d.shape[-1])).to(point3d.device)
105 |         
106 |         norm = torch.norm(point3d[:2], dim=0)
107 |         
108 |         if (norm != 0).all():
109 |             theta = torch.atan(point3d[2] / norm)
110 |             invnorm = 1.0 / norm
111 |             t = theta
112 |             rho = fisheye_inv_polynomial[0]
113 |             t_i = 1.0
114 |             
115 |             for i in range(1, len(fisheye_inv_polynomial)):
116 |                 t_i *= t
117 |                 rho += t_i * fisheye_inv_polynomial[i]
118 |             
119 |             x = point3d[0] * invnorm * rho
120 |             y = point3d[1] * invnorm * rho
121 |             
122 |             point2d[0] = x + xc
123 |             point2d[1] = y + yc
124 |         else:
125 |             point2d[0] = xc
126 |             point2d[1] = yc
127 |             raise Exception("norm is zero!")
128 |         
129 |         return point2d.transpose(0, 1)
130 | 
131 |     def undistort(self, point_2Ds):
132 |         """
133 |         undistort the input 2d points in fisheye camera
134 |         """
135 |         point_length = point_2Ds.shape[0]
136 |         depths = np.ones(shape=point_length)
137 |         point_3Ds = self.camera2world(point_2Ds, depths)
138 | 
139 |         # point_3Ds_homo = np.ones((point_length, 4))
140 |         # point_3Ds_homo[:, :3] = point_3Ds
141 | 
142 |         projected_2d_points = (self.intrinsic[:3, :3] @ point_3Ds.T).T
143 |         projected_2d_points = projected_2d_points[:, :2] / projected_2d_points[:, 2:]
144 |         return projected_2d_points
145 | 
146 | 
147 | 
148 | 
149 | if __name__ == '__main__':
150 |     camera = FishEyeCameraCalibrated('fisheye.calibration.json')
151 |     point = np.array([[660, 520], [520, 660], [123, 456]])
152 |     depth = np.array([30, 30, 40])
153 |     point = torch.from_numpy(point)
154 |     depth = torch.from_numpy(depth)
155 |     point3d = camera.camera2world_pytorch(point, depth)
156 |     print(point3d)
157 |     
158 |     # reprojected_point_2d = camera.world2camera(point3d)
159 |     # print(reprojected_point_2d)
160 |     
161 |     reprojected_point_2d = camera.world2camera(point3d.numpy())
162 |     print(reprojected_point_2d)
163 | 


--------------------------------------------------------------------------------
/networks/interpolant.py:
--------------------------------------------------------------------------------
  1 | import open3d
  2 | import sys
  3 | 
  4 | sys.path.append('..')
  5 | import torch
  6 | from models.SeqConvVAE import ConvVAE
  7 | import pickle
  8 | from utils.skeleton import Skeleton
  9 | import os
 10 | from tqdm import tqdm
 11 | from torch.utils.data import DataLoader
 12 | import numpy as np
 13 | from torch.utils.data import Dataset
 14 | from old_dataset import AMASSDataset
 15 | from utils.utils import transform_pose
 16 | from utils.rigid_transform_with_scale import umeyama
 17 | 
 18 | 
 19 | def calculate_error(estimated_seq, gt_seq):
 20 |     estimated_seq = np.asarray(estimated_seq)
 21 |     gt_seq = np.asarray(gt_seq)
 22 |     distance = estimated_seq - gt_seq
 23 |     distance = np.linalg.norm(distance, axis=2)
 24 |     m_distance = np.mean(distance)
 25 |     return m_distance
 26 | 
 27 | 
 28 | def global_align_skeleton_seq(estimated_seq, gt_seq):
 29 |     estimated_seq = np.asarray(estimated_seq).reshape((-1, 3))
 30 |     gt_seq = np.asarray(gt_seq).reshape(-1, 3)
 31 |     # aligned_pose_list = np.zeros_like(estimated_seq)
 32 |     # for s in range(estimated_seq.shape[0]):
 33 |     #     pose_p = estimated_seq[s]
 34 |     #     pose_gt_bs = gt_seq[s]
 35 |     c, R, t = umeyama(estimated_seq, gt_seq)
 36 |     pose_p = estimated_seq.dot(R) * c + t
 37 |     # aligned_pose_list[s] = pose_p
 38 |     
 39 |     return pose_p.reshape((-1, 15, 3))
 40 | 
 41 | 
 42 | def align_skeleton(estimated_seq, gt_seq):
 43 |     estimated_seq = np.asarray(estimated_seq)
 44 |     gt_seq = np.asarray(gt_seq)
 45 |     aligned_pose_list = np.zeros_like(estimated_seq)
 46 |     for s in range(estimated_seq.shape[0]):
 47 |         pose_p = estimated_seq[s]
 48 |         pose_gt_bs = gt_seq[s]
 49 |         c, R, t = umeyama(pose_p, pose_gt_bs)
 50 |         pose_p = pose_p.dot(R) * c + t
 51 |         aligned_pose_list[s] = pose_p
 52 |     
 53 |     return aligned_pose_list
 54 | 
 55 | class InterpolateDataset(Dataset):
 56 |     def __init__(self, pkl_path):
 57 |         with open(pkl_path, 'rb') as f:
 58 |             self.data = pickle.load(f)
 59 |     
 60 |     def __len__(self):
 61 |         return len(self.data)
 62 |     
 63 |     def __getitem__(self, i):
 64 |         data_i = self.data[i]
 65 |         data_i = data_i.reshape((-1, 45))
 66 |         data_i = torch.from_numpy(data_i).float()
 67 |         return data_i
 68 | 
 69 | class Interpolate:
 70 |     def __init__(self, pkl_path, network_path, out_path, seq_length=20, windows_size=5, latent_dim=128):
 71 |         self.seq_length = seq_length
 72 |         self.out_path = out_path
 73 |         self.network = ConvVAE(in_channels=45, out_channels=45, latent_dim=latent_dim, seq_len=self.seq_length)
 74 |         state_dict = torch.load(network_path)['state_dict']
 75 |         self.network.load_state_dict(state_dict)
 76 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 77 |         self.network = self.network.to(self.device)
 78 |         
 79 |         self.skeleton_model = Skeleton(None)
 80 |         
 81 |         self.test_dataset = InterpolateDataset(pkl_path=pkl_path)
 82 |     
 83 |     def save_sample(self, sample, out_path):
 84 |         skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample)
 85 |         for i, skeleton in enumerate(skeleton_list):
 86 |             open3d.io.write_triangle_mesh(os.path.join(out_path, 'out_%04d.ply' % i), skeleton)
 87 |     
 88 |     def show_example(self, sample):
 89 |         skeleton_list = self.skeleton_model.joint_list_2_mesh_list(sample)
 90 |         coor = open3d.geometry.TriangleMesh.create_coordinate_frame()
 91 |         open3d.visualization.draw_geometries(skeleton_list + [coor])
 92 |     
 93 |     
 94 |     def test(self, i, j, visualization=False, save=False):
 95 |         print('---------------------Start Eval-----------------------')
 96 |         self.network.eval()
 97 |         with torch.no_grad():
 98 |             seq_i = self.test_dataset[i].unsqueeze(0).to(self.device)
 99 |             seq_j = self.test_dataset[j].unsqueeze(0).to(self.device)
100 |             
101 |             mu_i, std_i, z_i = self.network.get_latent_space(seq_i)
102 |             mu_j, std_j, z_j = self.network.get_latent_space(seq_j)
103 | 
104 |             first_sampled_seq = self.network.decode(z_i).permute((0, 2, 1))[0]
105 |             first_sampled_seq = first_sampled_seq.cpu().detach_().numpy()
106 |             first_sampled_seq = first_sampled_seq.reshape((self.seq_length, 15, 3))
107 | 
108 |             second_sampled_seq = self.network.decode(z_j).permute((0, 2, 1))[0]
109 |             second_sampled_seq = second_sampled_seq.cpu().detach_().numpy()
110 |             second_sampled_seq = second_sampled_seq.reshape((self.seq_length, 15, 3))
111 | 
112 |             if visualization is True:
113 |                 self.show_example(first_sampled_seq)
114 |                 self.show_example(second_sampled_seq)
115 |             if save is True:
116 |                 for i in range(6):
117 |                     out_dir = os.path.join(self.out_path, str(i))
118 |                     if not os.path.isdir(out_dir):
119 |                         os.makedirs(out_dir)
120 |                 self.save_sample(first_sampled_seq, os.path.join(self.out_path, "0"))
121 |                 self.save_sample(second_sampled_seq, os.path.join(self.out_path, "5"))
122 | 
123 |             first_z = z_i[0].cpu().detach_().numpy()
124 |             second_z = z_j[0].cpu().detach_().numpy()
125 | 
126 |             interpolant_list = [first_z + (i / 5.) * (second_z - first_z) for i in range(1, 5)]
127 |             interpolant_list = torch.from_numpy(np.asarray(interpolant_list)).float()
128 |             interpolant_list = interpolant_list.to(self.device)
129 | 
130 |             sampled_result = self.network.decode(interpolant_list)
131 |             sampled_result = sampled_result.permute((0, 2, 1))
132 |             sampled_result = sampled_result.cpu().detach_().numpy()
133 |             sampled_result = sampled_result.reshape((-1, self.seq_length, 15, 3))
134 |             for i, sample in enumerate(sampled_result):
135 |                 if visualization is True:
136 |                     self.show_example(sample)
137 |                 if save is True:
138 |                     self.save_sample(sample, os.path.join(self.out_path, str(i + 1)))
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     pkl_path = r'../AMASSDataConverter/pkl_data/data_nframe_20_slidewindow_False.pkl'
143 |     sampler = Interpolate(pkl_path=pkl_path,
144 |                      network_path=r'\\winfs-inf\HPS\Mo2Cap2Plus1\work\BodyPoseOptimizer\networks\logs/2_full_dataset_latent_2048_len_20_slide_window_step_1_kl_0.5/checkpoints/14.pth.tar',
145 |                      out_path='out/interpolate2', seq_length=20, latent_dim=2048, windows_size=1)
146 |     for start, end in [(1770, 1300)]:
147 |         print('process: {}'.format((start, end)))
148 |         sampler.out_path = './interpolate_{}_{}'.format(start, end)
149 |         sampler.test(int(start), int(end), visualization=False, save=True)
150 | 


--------------------------------------------------------------------------------
/optimize_whole_sequence.py:
--------------------------------------------------------------------------------
  1 | from optimizer import main
  2 | import numpy as np
  3 | from natsort import natsorted
  4 | 
  5 | if __name__ == '__main__':
  6 |     import argparse
  7 |     import os
  8 | 
  9 |     parser = argparse.ArgumentParser(description='Data directory number')
 10 |     parser.add_argument('--data_path', required=True, type=str)
 11 |     parser.add_argument('--camera', required=False, type=str,
 12 |                         default='utils/fisheye/fisheye.calibration.json')
 13 | 
 14 |     parser.add_argument('--vae', required=False, type=float, default=0.00)
 15 |     parser.add_argument('--gmm', required=False, type=float, default=0.00)
 16 |     parser.add_argument('--smooth', required=False, type=float, default=0.001)
 17 |     parser.add_argument('--bone_length', required=False, type=float, default=0.01)
 18 |     parser.add_argument('--weight_3d', required=False, type=float, default=0.01)
 19 |     parser.add_argument('--reproj_weight', required=False, type=float, default=0.01)
 20 |     parser.add_argument('--save', required=False, default=False, type=lambda x: (str(x).lower() == 'true'))
 21 |     parser.add_argument('--final_smooth', required=False, default=True, type=lambda x: (str(x).lower() == 'true'))
 22 |     parser.add_argument('--merge', required=False, default=True, type=lambda x: (str(x).lower() == 'true'))
 23 |     args = parser.parse_args()
 24 | 
 25 |     data_dir = args.data_path
 26 | 
 27 |     estimated_pose = []
 28 |     optimized_pose = []
 29 |     gt_pose = []
 30 | 
 31 |     original_global_mpjpe = []
 32 |     mid_global_mpjpe = []
 33 |     optimized_global_mpjpe = []
 34 |     original_camera_pos_error = []
 35 |     optimized_camera_pos_error = []
 36 |     original_aligned_camera_pos_error = []
 37 |     optimized_aligned_camera_pos_error = []
 38 |     original_aligned_global_mpjpe = []
 39 |     aligned_mid_seq_mpjpe = []
 40 |     optimized_aligned_global_mpjpe = []
 41 |     aligned_original_mpjpe = []
 42 |     aligned_mid_optimized_mpjpe = []
 43 |     aligned_optimized_mpjpe = []
 44 |     bone_length_aligned_original_mpjpe = []
 45 |     bone_length_aligned_mid_optimized_mpjpe = []
 46 |     bone_length_aligned_optimized_mpjpe = []
 47 |     joints_error = []
 48 |     for data_path_name in natsorted(os.listdir(data_dir)):
 49 |         data_path = os.path.join(data_dir, data_path_name)
 50 |         print("running data: {}".format(data_path))
 51 |         if not os.path.isdir(data_path):
 52 |             continue
 53 |         bone_length = args.bone_length
 54 |         reproj_weight = args.reproj_weight
 55 |         res, estimated_pose_seq, mid_local_pose_seq, optimized_pose_seq, gt_pose_seq = main(data_path,
 56 |                                                                         camera_model_path=args.camera,
 57 |                                                                         vae_weight=args.vae, gmm_weight=args.gmm,
 58 |                                                                         smoothness_weight=args.smooth,
 59 |                                                                         visualization=False, save=args.save,
 60 |                                                                         bone_length_weight=bone_length,
 61 |                                                                         weight_3d=args.weight_3d,
 62 |                                                                         reproj_weight=reproj_weight, merge=args.merge,
 63 |                                                                         final_smooth=args.final_smooth)
 64 | 
 65 |         estimated_pose.extend(estimated_pose_seq)
 66 |         optimized_pose.extend(optimized_pose_seq)
 67 |         gt_pose.extend(gt_pose_seq)
 68 | 
 69 |         original_global_mpjpe.append(res['original_global_mpjpe'])
 70 |         mid_global_mpjpe.append(res['mid_global_mpjpe'])
 71 |         optimized_global_mpjpe.append(res['optimized_global_mpjpe'])
 72 |         original_camera_pos_error.append(res['original_camera_pos_error'])
 73 |         optimized_camera_pos_error.append(res['optimized_camera_pos_error'])
 74 |         original_aligned_camera_pos_error.append(res['original_aligned_camera_pos_error'])
 75 |         optimized_aligned_camera_pos_error.append(res['optimized_aligned_camera_pos_error'])
 76 |         original_aligned_global_mpjpe.append(res['original_aligned_global_mpjpe'])
 77 |         aligned_mid_seq_mpjpe.append(res['aligned_mid_seq_mpjpe'])
 78 |         optimized_aligned_global_mpjpe.append(res['optimized_aligned_global_mpjpe'])
 79 |         aligned_original_mpjpe.append(res['aligned_original_mpjpe'])
 80 |         aligned_mid_optimized_mpjpe.append(res['aligned_mid_optimized_mpjpe'])
 81 |         aligned_optimized_mpjpe.append(res['aligned_optimized_mpjpe'])
 82 |         bone_length_aligned_original_mpjpe.append(res['bone_length_aligned_original_mpjpe'])
 83 |         bone_length_aligned_mid_optimized_mpjpe.append(res['bone_length_aligned_mid_optimized_mpjpe'])
 84 |         bone_length_aligned_optimized_mpjpe.append(res['bone_length_aligned_optimized_mpjpe'])
 85 |         joints_error.append(res['joints_error'])
 86 | 
 87 |         if res['bone_length_aligned_optimized_mpjpe'] > res['bone_length_aligned_mid_optimized_mpjpe']:
 88 |             print(res)
 89 | 
 90 |     print('Average original global pose mpjpe: {}'.format(np.average(original_global_mpjpe)))
 91 |     print('Average mid global pose mpjpe: {}'.format(np.average(mid_global_mpjpe)))
 92 |     print('Average optimized global pose mpjpe: {}'.format(np.average(optimized_global_mpjpe)))
 93 |     print('-----------------------------------------')
 94 |     print('Average original cam pose error: {}'.format(np.average(original_camera_pos_error)))
 95 |     print('Average optimized cam pose error: {}'.format(np.average(optimized_camera_pos_error)))
 96 |     print('-----------------------------------------')
 97 |     print('Average original aligned cam pose error: {}'.format(np.average(original_aligned_camera_pos_error)))
 98 |     print('Average optimized aligned cam pose error: {}'.format(np.average(optimized_aligned_camera_pos_error)))
 99 |     print('-----------------------------------------')
100 |     print('Average original_aligned_global_mpjpe: {}'.format(np.average(original_aligned_global_mpjpe)))
101 |     print('Average aligned_mid_seq_mpjpe: {}'.format(np.average(aligned_mid_seq_mpjpe)))
102 |     print('Average optimized_aligned_global_mpjpe: {}'.format(np.average(optimized_aligned_global_mpjpe)))
103 |     print('-----------------------------------------')
104 |     print('Average aligned original global pose mpjpe: {}'.format(np.average(aligned_original_mpjpe)))
105 |     print('Average aligned mid local pose mpjpe: {}'.format(np.average(aligned_mid_optimized_mpjpe)))
106 |     print('Average aligned optimized global pose mpjpe: {}'.format(np.average(aligned_optimized_mpjpe)))
107 |     print('-----------------------------------------')
108 |     print('Average bone length aligned original global pose mpjpe: {}'.format(
109 |         np.average(bone_length_aligned_original_mpjpe)))
110 |     print('Average bone length aligned mid local pose mpjpe: {}'.format(
111 |         np.average(bone_length_aligned_mid_optimized_mpjpe)))
112 |     print('Average bone length aligned optimized global pose mpjpe: {}'.format(
113 |         np.average(bone_length_aligned_optimized_mpjpe)))
114 |     print('-----------------------------------------')
115 |     print('joints error is: {}'.format(np.mean(joints_error, axis=0)))
116 | 
117 |     print('-------------------------------------------------------------')
118 | 


--------------------------------------------------------------------------------
/networks/dataset/local_dataset.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('../..')
  3 | import open3d
  4 | 
  5 | import torch
  6 | from torch.utils.data.dataset import Dataset
  7 | from torch.utils.data.dataloader import DataLoader
  8 | from tqdm import tqdm
  9 | import pickle
 10 | import os
 11 | from utils.utils import get_relative_global_pose_with_camera_matrix
 12 | import numpy as np
 13 | 
 14 | 
 15 | class AMASSDataset(Dataset):
 16 |     def __init__(self, data_path, frame_num, windows_size=1, is_train=True, fps=25, slide_window=True,
 17 |                  balance_distrib=False, with_mo2cap2_data=False):
 18 |         """
 19 | 
 20 |         :param data_path: directory with pkls containing data
 21 |         :param frame_num: length of sequence
 22 |         :param windows_size: one frame from n frame
 23 |         :param is_train:
 24 |         :param fps: the fps of test data
 25 |         """
 26 |         self.windows_size = windows_size
 27 |         self.is_train = is_train
 28 |         self.slide_window = slide_window
 29 |         self.frame_num = frame_num
 30 |         self.balance_distrib = balance_distrib
 31 |         self.with_mo2cap2_data = with_mo2cap2_data
 32 |         self.data_list = self.load_pkls(data_path, is_train, self.with_mo2cap2_data, self.balance_distrib)
 33 |         self.local_pose_seq_list = self.get_local_pose_list(self.data_list, frame_num=self.frame_num,
 34 |                                                                          windows_size=self.windows_size,
 35 |                                                                          fps=fps)
 36 |         # with open(output_path, 'wb') as f:
 37 |         #     pickle.dump(self.relative_pose_seq_list, f, protocol=4)
 38 |     
 39 |     def __getitem__(self, i):
 40 |         data_i = self.local_pose_seq_list[i]
 41 |         data_i = data_i.reshape((-1, 45))
 42 |         data_i = torch.from_numpy(data_i).float()
 43 |         return data_i
 44 |     
 45 |     def __len__(self):
 46 |         return len(self.local_pose_seq_list)
 47 |     
 48 |     def load_pkls(self, path, is_train, with_mo2cap2_data=False, balance_distrib=False):
 49 |         data_list = []
 50 |         raw_pkl_path_list = os.listdir(path)
 51 |         if with_mo2cap2_data is True:
 52 |             print('use only mo2cap2 motion!')
 53 |             seq_names = np.load('/HPS/ScanNet/static00/SURREAL/smpl_data/seq_names.npy').tolist()
 54 |             pkl_path_list = []
 55 |             for seq_name in seq_names:
 56 |                 for pkl_path in raw_pkl_path_list:
 57 |                     if seq_name in pkl_path:
 58 |                         pkl_path_list.append(pkl_path)
 59 |         else:
 60 |             pkl_path_list = raw_pkl_path_list
 61 |         print(len(pkl_path_list))
 62 |         # pkl_path_list = [pkl_path for pkl_path in pkl_path_list if 'CMU' in pkl_path]
 63 |         if is_train:
 64 |             pkl_path_list = pkl_path_list[:-10]
 65 |         else:
 66 |             pkl_path_list = pkl_path_list[-10:]
 67 |         
 68 |         if balance_distrib is True:
 69 |             print('use balanced distribution')
 70 |             walking_pkl_path_list = [pkl_path for pkl_path in pkl_path_list if 'walk' in pkl_path.lower()]
 71 |             non_walking_pkl_path_list = [pkl_path for pkl_path in pkl_path_list if 'walk' not in pkl_path.lower()]
 72 |             np.random.shuffle(walking_pkl_path_list)
 73 |             walking_num = int(1 / 20 * len(non_walking_pkl_path_list))
 74 |             pkl_path_list = non_walking_pkl_path_list + walking_pkl_path_list[:walking_num]
 75 |         for file_name in tqdm(pkl_path_list):
 76 |             file_path = os.path.join(path, file_name)
 77 |             with open(file_path, 'rb') as f:
 78 |                 data = pickle.load(f)
 79 |                 data_list.append(data)
 80 |         return data_list
 81 |     
 82 |     def get_local_pose_list(self, data_list, frame_num=10, windows_size=1, fps=30):
 83 |         total_frame_number = frame_num * windows_size
 84 |         
 85 |         local_pose_seq_list = []
 86 |         for seq_data in tqdm(data_list):
 87 |             seq_len = len(seq_data['local_pose_list'])
 88 |             data_frame_rate = int(seq_data['frame_rate'])
 89 |             frame_rate_timer = round(data_frame_rate / fps)
 90 |             if self.slide_window is True:
 91 |                 interval = 1
 92 |             else:
 93 |                 interval = total_frame_number * frame_rate_timer
 94 |             for i in range(0, seq_len - total_frame_number * frame_rate_timer, interval):
 95 |                 local_pose_seq_list.append(
 96 |                     seq_data['local_pose_list'][i: i + total_frame_number * frame_rate_timer: frame_rate_timer])
 97 |         
 98 |         return np.asarray(local_pose_seq_list)
 99 | 
100 | 
101 | class Mo2Cap2Dataset(Dataset):
102 |     def __init__(self, pkl_path, frame_num=5, slide_window=False):
103 |         self.frame_num = frame_num
104 |         with open(pkl_path, 'rb') as f:
105 |             data = pickle.load(f)
106 |             estimated_local_skeleton = data['estimated_local_skeleton']
107 |             gt_skeleton = data['gt_global_skeleton']
108 |             cv2world_mat_list = data['camera_pose_list']
109 |         
110 |         self.estimated_local_pose_list = []
111 |         self.gt_pose_list = []
112 |         self.cam_list = []
113 |         # split to 5-frame sequence
114 |         for i in tqdm(range(0, len(estimated_local_skeleton) - frame_num, frame_num)):
115 |             estimated_seq = estimated_local_skeleton[i: i + self.frame_num]
116 |             cam_seq = cv2world_mat_list[i: i + self.frame_num]
117 |             gt_seq = gt_skeleton[i: i + self.frame_num]
118 |             self.estimated_local_pose_list.append(np.asarray(estimated_seq))
119 |             self.gt_pose_list.append(np.asarray(gt_seq))
120 |             self.cam_list.append(np.asarray(cam_seq))
121 |         
122 |         self.slide_window = False
123 |         
124 |         self.relative_global_pose_list = self.get_relative_global_pose_list(self.estimated_local_pose_list,
125 |                                                                             self.cam_list)
126 |         
127 |         print(len(self.relative_global_pose_list))
128 |     
129 |     def get_relative_global_pose_list(self, estimated_local_pose_list, cam_list):
130 |         relative_global_pose_seq_list = []
131 |         
132 |         for local_pose_seq, camera_seq in tqdm(zip(estimated_local_pose_list, cam_list)):
133 |             relative_global_pose_seq = get_relative_global_pose_with_camera_matrix(local_pose_seq, camera_seq)
134 |             
135 |             # select {frame_num} pose from relative global pose list
136 |             final_relative_global_pose_seq = []
137 |             for i in range(0, len(relative_global_pose_seq)):
138 |                 final_relative_global_pose_seq.append(relative_global_pose_seq[i])
139 |             
140 |             relative_global_pose_seq_list.append(final_relative_global_pose_seq)
141 |         
142 |         return np.asarray(relative_global_pose_seq_list)
143 |     
144 |     def __getitem__(self, index):
145 |         out_pose = self.relative_global_pose_list[index].reshape([-1, 45])
146 |         out_pose = torch.from_numpy(out_pose).float()
147 |         
148 |         out_cam = self.cam_list[index]
149 |         out_cam = torch.from_numpy(out_cam).float()
150 |         
151 |         out_gt = self.gt_pose_list[index]
152 |         out_gt = torch.from_numpy(out_gt).float()
153 |         return out_pose, out_cam, out_gt
154 |     
155 |     def __len__(self):
156 |         return len(self.relative_global_pose_list)
157 | 
158 | 
159 | if __name__ == '__main__':
160 | 
161 |     dataset = AMASSDataset(data_path='/home/jianwang/ScanNet/static00/EgocentricAMASS',
162 |                            frame_num=10, windows_size=1,
163 |                            is_train=True)
164 |     print(len(dataset))
165 |     
166 |     # dataset = Mo2Cap2Dataset(pkl_path='../test_data_weipeng_studio/data_start_887_end_987/test_data.pkl')
167 |     # from utils.skeleton import Skeleton
168 |     #
169 |     # skeleton_model = Skeleton(calibration_path=None)
170 |     # d = dataset.relative_global_pose_list[0]
171 |     # skeleton_list = skeleton_model.joint_list_2_mesh_list(d)
172 |     # open3d.visualization.draw_geometries(skeleton_list)
173 | 


--------------------------------------------------------------------------------
/networks/train_local_global.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append('..')
  4 | import torch
  5 | import numpy as np
  6 | 
  7 | from config import args
  8 | import datetime
  9 | import os
 10 | from torch.utils.data import DataLoader
 11 | from tqdm import tqdm
 12 | from torch.utils.tensorboard import SummaryWriter
 13 | from torch.optim import Adam
 14 | from pprint import pprint
 15 | from utils.rigid_transform_with_scale import umeyama
 16 | # from SeqConvVAE import ConvVAE
 17 | # from SeqCCVAE import ConvVAE
 18 | from models.LocalGlobalSeqVAE import LocalGlobalSeqVAE
 19 | from models.SeqCVAE_attention import AttentionVAE
 20 | from models.RecurrentVAE import RecurrentVAE
 21 | 
 22 | 
 23 | def prepare_log_dir(log_dir=None):
 24 |     # prepare log dirs
 25 |     if args.log_dir is None and log_dir is None:
 26 |         log_dir = datetime.datetime.now().strftime('%m.%d-%H:%M:%S')
 27 |     elif log_dir is None:
 28 |         log_dir = args.log_dir
 29 |     log_dir = os.path.join('logs', log_dir)
 30 |     print('making save dir at: {}'.format(log_dir))
 31 |     os.makedirs(log_dir)
 32 |     checkpoints_dir = os.path.join(log_dir, 'checkpoints')
 33 |     tensorboard_dir = os.path.join(log_dir, 'tensorboard')
 34 |     os.makedirs(checkpoints_dir)
 35 |     os.makedirs(tensorboard_dir)
 36 |     return checkpoints_dir, tensorboard_dir
 37 | 
 38 | 
 39 | class Train:
 40 |     def __init__(self):
 41 |         if args.new_dataset is True:
 42 |             from dataset import AMASSDataset
 43 |         else:
 44 |             from old_dataset import AMASSDataset
 45 |         self.seq_length = args.seq_length
 46 |         self.attention = args.attention
 47 |         
 48 |         self.amass_train_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=True,
 49 |                                                 windows_size=args.slide_window_step, balance_distrib=args.data_balance)
 50 |         self.train_dataloader = DataLoader(dataset=self.amass_train_dataset, batch_size=args.batch_size, shuffle=True,
 51 |                                            drop_last=True, num_workers=args.num_workers)
 52 |         
 53 |         self.test_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=False,
 54 |                                          windows_size=args.slide_window_step, balance_distrib=args.data_balance)
 55 |         self.test_dataloader = DataLoader(self.test_dataset, batch_size=args.batch_size, shuffle=False,
 56 |                                           drop_last=False, num_workers=args.num_workers)
 57 |         # self.network = AttentionVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim,
 58 |         #                             seq_len=self.seq_length, attention_type=self.attention)
 59 |         self.network = LocalGlobalSeqVAE(in_channels=45, out_channels=45, local_vae_latent_dim=args.latent_dim,
 60 |                                          global_vae_latent_dim=args.latent_dim, seq_len=self.seq_length)
 61 |         # self.network = RecurrentVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim, seq_len=self.seq_length)
 62 |         
 63 |         self.optimizer = Adam(params=self.network.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
 64 |         # self.l2_loss = torch.nn.MSELoss(reduction='elementwise_mean')
 65 |         # self.l2_loss = torch.nn.MSELoss()
 66 |         
 67 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 68 |         
 69 |         self.network = self.network.to(self.device)
 70 |         
 71 |         self.checkpoint_dir, tensorboard_dir = prepare_log_dir()
 72 |         self.tensorboard_writer = SummaryWriter(tensorboard_dir)
 73 |     
 74 |     def train(self):
 75 |         print('---------------------Start Training-----------------------')
 76 |         pprint(args.__dict__)
 77 |         running_loss = 0
 78 |         running_local_recon_loss = 0
 79 |         running_global_recon_loss = 0
 80 |         count = 0
 81 |         for e in range(args.epoch):
 82 |             self.network.train()
 83 |             print('-----------------Epoch: {}-----------------'.format(str(e)))
 84 |             for i, (relative_global_pose, local_pose, camera_matrix) in tqdm(enumerate(self.train_dataloader)):
 85 |                 self.optimizer.zero_grad()
 86 |                 # relative_global_pose = relative_global_pose.to(self.device)
 87 |                 local_pose = local_pose.to(self.device)
 88 |                 camera_matrix = camera_matrix.to(self.device)
 89 | 
 90 |                 local_pose_out, local_pose_input, mu_local, log_var_local, \
 91 |                 global_pose_out, global_pose_input, mu_global, log_var_global = self.network(local_pose, camera_matrix)
 92 |                 loss, other_data = \
 93 |                     self.network.loss_function(local_pose_out, local_pose_input, mu_local, log_var_local,
 94 |                                                args.kl_weight * 64 / len(self.amass_train_dataset),
 95 |                                                global_pose_out, global_pose_input, mu_global, log_var_global,
 96 |                                                args.kl_weight * 64 / len(self.amass_train_dataset))
 97 |                 loss.backward()
 98 |                 self.optimizer.step()
 99 |                 
100 |                 
101 |                 
102 |                 running_loss += loss.item()
103 |                 running_local_recon_loss += other_data[1].item()
104 |                 running_global_recon_loss += other_data[4].item()
105 |                 # logs
106 |                 if count % args.log_step == 0 and count != 0:
107 |                     # ...log the running loss
108 |                     self.tensorboard_writer.add_scalar('training loss', running_loss, count)
109 |                     self.tensorboard_writer.add_scalar('training local recon loss', running_local_recon_loss, count)
110 |                     self.tensorboard_writer.add_scalar('training global recon loss', running_global_recon_loss, count)
111 |                     print("running loss is: {}".format(running_loss))
112 |                     print("running local recon loss is: {}".format(running_local_recon_loss))
113 |                     print("running global recon loss is: {}".format(running_global_recon_loss))
114 |                     running_loss = 0
115 |                     running_local_recon_loss = 0
116 |                     running_global_recon_loss = 0
117 |                 count += 1
118 |             # log the logs
119 |             eval_loss = self.eval()
120 |             self.tensorboard_writer.add_scalar('eval loss', eval_loss, e)
121 |             print('eval loss is: {}'.format(eval_loss))
122 |             
123 |             torch.save({
124 |                 'epoch': e + 1,
125 |                 'args': args.__dict__,
126 |                 'state_dict': self.network.state_dict(),
127 |                 'eval_result': eval_loss,
128 |                 'optimizer': self.optimizer.state_dict(),
129 |             }, os.path.join(self.checkpoint_dir, str(e) + '.pth.tar'))
130 |     
131 |     def eval(self):
132 |         print('---------------------Start Eval-----------------------')
133 |         self.network.eval()
134 |         mpjpe_list = []
135 |         with torch.no_grad():
136 |             for i, (relative_global_pose, local_pose, camera_matrix) in tqdm(enumerate(self.test_dataloader)):
137 |                 relative_global_pose = relative_global_pose.to(self.device)
138 |                 local_pose = local_pose.to(self.device)
139 |                 camera_matrix = camera_matrix.to(self.device)
140 |     
141 |                 local_pose_preds, local_pose_input, mu_local, log_var_local, \
142 |                 global_pose_preds, global_pose_input, mu_global, log_var_global = self.network(local_pose, camera_matrix)
143 |                 
144 |                 global_pose_preds = global_pose_preds.cpu().numpy()
145 |                 global_pose_input = global_pose_input.cpu().numpy()
146 |                 relative_global_pose = np.reshape(relative_global_pose, [-1, self.seq_length, 15, 3])
147 |                 pose_gt = np.reshape(relative_global_pose, [-1, self.seq_length, 15, 3])
148 |                 mpjpe_list.append(self.mpjpe(global_pose_preds, pose_gt))
149 |         
150 |         print('MPJPE is: {}'.format(np.mean(mpjpe_list)))
151 |         return np.mean(mpjpe_list)
152 |     
153 |     def mpjpe(self, pose_preds, pose_gt):
154 |         distance = np.linalg.norm(pose_gt - pose_preds, axis=3)
155 |         return np.mean(distance)
156 | 
157 | 
158 | if __name__ == '__main__':
159 |     train = Train()
160 |     train.train()
161 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/slam_reader.py:
--------------------------------------------------------------------------------
  1 | # read slam results
  2 | import open3d
  3 | from collections import OrderedDict
  4 | import numpy as np
  5 | from scipy.spatial.transform import Rotation
  6 | from utils.rigid_transform_with_scale import umeyama
  7 | from copy import deepcopy
  8 | from utils.skeleton import Skeleton
  9 | 
 10 | 
 11 | class SLAMReader:
 12 |     def __init__(self, fps=30):
 13 |         self.fps = fps
 14 |         self.skeleton_model = Skeleton(calibration_path=None)
 15 |         pass
 16 |     
 17 |     def trans_qrot_to_matrix(self, trans, rot):
 18 |         # rot is quat
 19 |         rot_mat = Rotation.from_quat(rot).as_matrix()
 20 |         mat = np.array([
 21 |             np.concatenate([rot_mat[0], [trans[0]]]),
 22 |             np.concatenate([rot_mat[1], [trans[1]]]),
 23 |             np.concatenate([rot_mat[2], [trans[2]]]),
 24 |             [0, 0, 0, 1]
 25 |         ])
 26 |         return mat
 27 |     
 28 |     def trans_rotmat_to_matrix(self, trans, rot):
 29 |         # rot is matrix
 30 |         mat = np.array([
 31 |             np.concatenate([rot[0], [trans[0]]]),
 32 |             np.concatenate([rot[1], [trans[1]]]),
 33 |             np.concatenate([rot[2], [trans[2]]]),
 34 |             [0, 0, 0, 1]
 35 |         ])
 36 |         return mat
 37 |     
 38 |     def visualize_traj(self, slam_traj_mat, gt_traj_mat):
 39 |         slam_point_cloud = open3d.geometry.PointCloud()
 40 |         slam_point_cloud.points = open3d.utility.Vector3dVector(slam_traj_mat)
 41 |         slam_point_cloud.paint_uniform_color([1, 0, 0])
 42 |         gt_pointcloud = open3d.geometry.PointCloud()
 43 |         gt_pointcloud.points = open3d.utility.Vector3dVector(gt_traj_mat)
 44 |         gt_pointcloud.paint_uniform_color([0, 1, 0])
 45 |         
 46 |         # mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame()
 47 |         
 48 |         open3d.visualization.draw_geometries([gt_pointcloud, slam_point_cloud])
 49 |     
 50 |     def read_trajectory_new(self, trajectory_path, local_pose_list, gt_global_pose, start_frame, end_frame):
 51 |         with open(trajectory_path, 'r') as f:
 52 |             frame_lines = f.readlines()
 53 |         
 54 |         # mat_trajectory = OrderedDict()
 55 |         # trans_and_rot = OrderedDict()
 56 |         mat_trajectory = []
 57 |         trans_and_rot = []
 58 |         for frame in frame_lines:
 59 |             frame_info = frame.strip().split()
 60 |             frame_id = round(float(frame_info[0]) * self.fps)
 61 |             trans = np.array(frame_info[1:4]).astype(np.float)
 62 |             rot = np.array(frame_info[4:]).astype(np.float)
 63 |             if start_frame <= frame_id < end_frame:
 64 |                 trans_and_rot.append({"loc": trans, 'rot': rot})
 65 |         
 66 |         # trans and rot to relative trans and rot
 67 |         
 68 |         trans_and_rot = self.get_relative_camera_pose_list(trans_and_rot)
 69 |         
 70 |         gt_global_skeleton_list = np.asarray(gt_global_pose)
 71 | 
 72 | 
 73 |         # also align bone length
 74 |         # for i in range(len(gt_global_pose)):
 75 |         #     local_pose = local_pose_list[i]
 76 |         #     global_pose = gt_global_pose[i]
 77 |         #     c_p, R_p, t_p = umeyama(local_pose, global_pose)
 78 |         #     local_pose_list[i] *= c_p
 79 | 
 80 |         
 81 |         gt_head_locs = gt_global_skeleton_list[:, 0]
 82 | 
 83 |         
 84 |         slam_traj_list = []
 85 |         gt_traj_list = []
 86 |         for i in range(len(trans_and_rot)):
 87 |             slam_i_trans = trans_and_rot[i]['loc']
 88 |             slam_i_rot = trans_and_rot[i]['rot']
 89 | 
 90 |             # get head locs from SLAM
 91 |             slam_coord = self.trans_qrot_to_matrix(slam_i_trans, slam_i_rot)
 92 |             local_skeleton = local_pose_list[i]
 93 |             local_skeleton_points = open3d.geometry.PointCloud()
 94 |             local_skeleton_points.points = open3d.utility.Vector3dVector(local_skeleton)
 95 |             local_skeleton_points = local_skeleton_points.transform(slam_coord)
 96 |             global_skeleton = np.asarray(local_skeleton_points.points)
 97 | 
 98 |             slam_traj_list.append(global_skeleton[0])
 99 |             gt_traj_list.append(gt_head_locs[i])
100 |         slam_traj_mat = np.vstack(slam_traj_list)
101 |         gt_traj_mat = np.vstack(gt_traj_list)
102 |         
103 |         c, R, t = umeyama(slam_traj_mat, gt_traj_mat)
104 |         c_1, R_1, t_1 = umeyama(gt_traj_mat, slam_traj_mat)
105 |         
106 |         # self.visualize_traj(slam_traj_mat.dot(R) * c + t, gt_traj_mat)
107 |         
108 |         # self.show_global_pose_sequence_and_slam_trajectory(global_skeleton_list, slam_traj_mat)
109 |         
110 |         for i in range(len(trans_and_rot)):
111 |             trans = trans_and_rot[i]['loc']
112 |             rot = trans_and_rot[i]['rot']
113 |             trans = trans * c
114 |             
115 |             transform_mat = self.trans_qrot_to_matrix(trans, rot)
116 |             
117 |             mat_trajectory.append(transform_mat)
118 | 
119 | 
120 |         
121 |         return mat_trajectory, R_1, t_1
122 |     
123 |     def show_global_pose_sequence_and_slam_trajectory(self, global_skeleton_list, slam_traj_mat):
124 |         global_mesh = []
125 |         for skeleton in global_skeleton_list:
126 |             global_mesh.append(self.skeleton_model.joints_2_mesh(skeleton))
127 |         slam_point_cloud = open3d.geometry.PointCloud()
128 |         slam_point_cloud.points = open3d.utility.Vector3dVector(slam_traj_mat)
129 |         slam_point_cloud.paint_uniform_color([1, 0, 0])
130 |         
131 |         mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame()
132 |         
133 |         open3d.visualization.draw_geometries([slam_point_cloud, mesh_frame] + global_mesh)
134 |     
135 |     def trans_qrot_to_matrix(self, trans, rot):
136 |         # rot is quat
137 |         rot_mat = Rotation.from_quat(rot).as_matrix()
138 |         mat = np.array([
139 |             np.concatenate([rot_mat[0], [trans[0]]]),
140 |             np.concatenate([rot_mat[1], [trans[1]]]),
141 |             np.concatenate([rot_mat[2], [trans[2]]]),
142 |             [0, 0, 0, 1]
143 |         ])
144 |         return mat
145 |     
146 |     def matrix_to_trans_qrot(self, matrix):
147 |         translation = np.asarray([matrix[0][3], matrix[1][3], matrix[2][3]])
148 |         
149 |         rotation_matrix = np.asarray([matrix[0][:3], matrix[1][:3], matrix[2][:3]])
150 |         
151 |         qrot = Rotation.from_matrix(rotation_matrix).as_quat()
152 |         
153 |         return translation, qrot
154 |     
155 |     def get_relative_camera_pose_list(self, camera_pose_list):
156 |         # firstly get relative camera pose list
157 |         relative_camera_pose_list = []
158 |         camera_pose_0 = deepcopy(camera_pose_list[0])
159 |         camera_matrix_0 = self.trans_qrot_to_matrix(camera_pose_0['loc'], camera_pose_0['rot'])
160 |         camera_matrix_0_inv = np.linalg.inv(camera_matrix_0)
161 |         for i, camera_pose_i in enumerate(camera_pose_list):
162 |             camera_matrix_i = self.trans_qrot_to_matrix(camera_pose_i['loc'], camera_pose_i['rot'])
163 |             camera_matrix_i_to_0 = camera_matrix_0_inv.dot(camera_matrix_i)
164 |             trans, qrot = self.matrix_to_trans_qrot(camera_matrix_i_to_0)
165 |             relative_camera_pose_list.append({'loc': trans,
166 |                                               'rot': qrot})
167 |         return relative_camera_pose_list
168 |     
169 |     def read_trajectory(self, trajectory_path, start_frame, end_frame, scale=1):
170 |         with open(trajectory_path, 'r') as f:
171 |             frame_lines = f.readlines()
172 | 
173 |         # mat_trajectory = OrderedDict()
174 |         # trans_and_rot = OrderedDict()
175 |         mat_trajectory = []
176 |         trans_and_rot = []
177 |         for frame in frame_lines:
178 |             frame_info = frame.strip().split()
179 |             frame_id = round(float(frame_info[0]) * self.fps)
180 |             trans = np.array(frame_info[1:4]).astype(np.float)
181 |             rot = np.array(frame_info[4:]).astype(np.float)
182 |             if start_frame <= frame_id < end_frame:
183 |                 trans_and_rot.append({"loc": trans, 'rot': rot})
184 | 
185 |         # trans and rot to relative trans and rot
186 | 
187 |         trans_and_rot = self.get_relative_camera_pose_list(trans_and_rot)
188 | 
189 | 
190 |         slam_traj_list = []
191 |         for i in range(len(trans_and_rot)):
192 |             trans = trans_and_rot[i]['loc']
193 |             rot = trans_and_rot[i]['rot']
194 |             trans = trans * scale
195 | 
196 |             transform_mat = self.trans_qrot_to_matrix(trans, rot)
197 | 
198 |             mat_trajectory.append(transform_mat)
199 | 
200 |         return mat_trajectory
201 | 


--------------------------------------------------------------------------------
/networks/dataset/global_dataset.py:
--------------------------------------------------------------------------------
  1 | import open3d
  2 | import torch
  3 | from torch.utils.data.dataset import Dataset
  4 | from torch.utils.data.dataloader import DataLoader
  5 | from tqdm import tqdm
  6 | import pickle
  7 | import os
  8 | from utils.utils import get_relative_global_pose, get_relative_global_pose_with_camera_matrix
  9 | import numpy as np
 10 | 
 11 | 
 12 | class AMASSDataset(Dataset):
 13 |     def __init__(self, data_path, frame_num, windows_size=1, is_train=True, fps=25, slide_window=True,
 14 |                  balance_distrib=False, with_mo2cap2_data=False):
 15 |         """
 16 | 
 17 |         :param data_path: directory with pkls containing data
 18 |         :param frame_num: length of sequence
 19 |         :param windows_size: one frame from n frame
 20 |         :param is_train:
 21 |         :param fps: the fps of test data
 22 |         """
 23 |         self.windows_size = windows_size
 24 |         self.is_train = is_train
 25 |         self.slide_window = slide_window
 26 |         self.frame_num = frame_num
 27 |         self.balance_distrib = balance_distrib
 28 |         self.with_mo2cap2_data = with_mo2cap2_data
 29 |         self.data_list = self.load_pkls(data_path, is_train, self.with_mo2cap2_data, self.balance_distrib)
 30 |         self.relative_pose_seq_list = self.get_relative_global_pose_list(self.data_list, frame_num=self.frame_num,
 31 |                                                                          windows_size=self.windows_size,
 32 |                                                                          fps=fps)
 33 |     
 34 |     def __getitem__(self, i):
 35 |         data_i = self.relative_pose_seq_list[i]
 36 |         data_i = data_i.reshape((-1, 45))
 37 |         data_i = torch.from_numpy(data_i).float()
 38 |         return data_i
 39 |     
 40 |     def __len__(self):
 41 |         return len(self.relative_pose_seq_list)
 42 |     
 43 |     def load_pkls(self, path, is_train, with_mo2cap2_data=False, balance_distrib=False):
 44 |         data_list = []
 45 |         raw_pkl_path_list = os.listdir(path)
 46 |         if with_mo2cap2_data is True:
 47 |             print('use only mo2cap2 motion!')
 48 |             seq_names = np.load('/HPS/ScanNet/static00/SURREAL/smpl_data/seq_names.npy').tolist()
 49 |             pkl_path_list = []
 50 |             for seq_name in seq_names:
 51 |                 for pkl_path in raw_pkl_path_list:
 52 |                     if seq_name in pkl_path:
 53 |                         pkl_path_list.append(pkl_path)
 54 |         else:
 55 |             pkl_path_list = raw_pkl_path_list
 56 |         print(len(pkl_path_list))
 57 |         if is_train:
 58 |             pkl_path_list = pkl_path_list[:-10]
 59 |         else:
 60 |             pkl_path_list = pkl_path_list[-10:]
 61 |         
 62 |         if balance_distrib is True:
 63 |             print('use balanced distribution')
 64 |             walking_pkl_path_list = [pkl_path for pkl_path in pkl_path_list if 'walk' in pkl_path.lower()]
 65 |             non_walking_pkl_path_list = [pkl_path for pkl_path in pkl_path_list if 'walk' not in pkl_path.lower()]
 66 |             np.random.shuffle(walking_pkl_path_list)
 67 |             walking_num = int(1 / 20 * len(non_walking_pkl_path_list))
 68 |             pkl_path_list = non_walking_pkl_path_list + walking_pkl_path_list[:walking_num]
 69 |         for file_name in tqdm(pkl_path_list):
 70 |             file_path = os.path.join(path, file_name)
 71 |             with open(file_path, 'rb') as f:
 72 |                 data = pickle.load(f)
 73 |                 data_list.append(data)
 74 |         return data_list
 75 |     
 76 |     def get_local_poses(self, data_list):
 77 |         local_pose_list = []
 78 |         for data in data_list:
 79 |             local_pose_list.extend(data['local_pose_list'])
 80 |         return local_pose_list
 81 |     
 82 |     def get_relative_global_pose_list(self, data_list, frame_num=5, windows_size=5, fps=30):
 83 |         total_frame_number = frame_num * windows_size
 84 |         relative_global_pose_seq_list = []
 85 |         
 86 |         local_pose_seq_list = []
 87 |         camera_seq_list = []
 88 |         for seq_data in data_list:
 89 |             seq_len = len(seq_data['local_pose_list'])
 90 |             data_frame_rate = int(seq_data['frame_rate'])
 91 |             frame_rate_timer = round(data_frame_rate / fps)
 92 |             if self.slide_window is True:
 93 |                 interval = 1
 94 |             else:
 95 |                 interval = total_frame_number * frame_rate_timer
 96 |             for i in range(0, seq_len - total_frame_number * frame_rate_timer, interval):
 97 |                 local_pose_seq_list.append(
 98 |                     seq_data['local_pose_list'][i: i + total_frame_number * frame_rate_timer: frame_rate_timer])
 99 |                 camera_seq_list.append(
100 |                     seq_data['cam_list'][i: i + total_frame_number * frame_rate_timer: frame_rate_timer])
101 |         for local_pose_seq, camera_seq in tqdm(zip(local_pose_seq_list, camera_seq_list)):
102 |             relative_global_pose_seq = get_relative_global_pose(local_pose_seq, camera_seq)
103 |             
104 |             # select {frame_num} pose from relative global pose list
105 |             final_relative_global_pose_seq = []
106 |             for i in range(0, len(relative_global_pose_seq), windows_size):
107 |                 final_relative_global_pose_seq.append(relative_global_pose_seq[i])
108 |             
109 |             relative_global_pose_seq_list.append(final_relative_global_pose_seq)
110 |         
111 |         return np.asarray(relative_global_pose_seq_list)
112 | 
113 | 
114 | class Mo2Cap2Dataset(Dataset):
115 |     def __init__(self, pkl_path, frame_num=5, slide_window=False):
116 |         self.frame_num = frame_num
117 |         with open(pkl_path, 'rb') as f:
118 |             data = pickle.load(f)
119 |             estimated_local_skeleton = data['estimated_local_skeleton']
120 |             gt_skeleton = data['gt_global_skeleton']
121 |             cv2world_mat_list = data['camera_pose_list']
122 |         
123 |         self.estimated_local_pose_list = []
124 |         self.gt_pose_list = []
125 |         self.cam_list = []
126 |         # split to 5-frame sequence
127 |         for i in tqdm(range(0, len(estimated_local_skeleton) - frame_num, frame_num)):
128 |             estimated_seq = estimated_local_skeleton[i: i + self.frame_num]
129 |             cam_seq = cv2world_mat_list[i: i + self.frame_num]
130 |             gt_seq = gt_skeleton[i: i + self.frame_num]
131 |             self.estimated_local_pose_list.append(np.asarray(estimated_seq))
132 |             self.gt_pose_list.append(np.asarray(gt_seq))
133 |             self.cam_list.append(np.asarray(cam_seq))
134 |         
135 |         self.slide_window = False
136 |         
137 |         self.relative_global_pose_list = self.get_relative_global_pose_list(self.estimated_local_pose_list,
138 |                                                                             self.cam_list)
139 |         
140 |         print(len(self.relative_global_pose_list))
141 |     
142 |     def get_relative_global_pose_list(self, estimated_local_pose_list, cam_list):
143 |         relative_global_pose_seq_list = []
144 |         
145 |         for local_pose_seq, camera_seq in tqdm(zip(estimated_local_pose_list, cam_list)):
146 |             relative_global_pose_seq = get_relative_global_pose_with_camera_matrix(local_pose_seq, camera_seq)
147 |             
148 |             # select {frame_num} pose from relative global pose list
149 |             final_relative_global_pose_seq = []
150 |             for i in range(0, len(relative_global_pose_seq)):
151 |                 final_relative_global_pose_seq.append(relative_global_pose_seq[i])
152 |             
153 |             relative_global_pose_seq_list.append(final_relative_global_pose_seq)
154 |         
155 |         return np.asarray(relative_global_pose_seq_list)
156 |     
157 |     def __getitem__(self, index):
158 |         out_pose = self.relative_global_pose_list[index].reshape([-1, 45])
159 |         out_pose = torch.from_numpy(out_pose).float()
160 |         
161 |         out_cam = self.cam_list[index]
162 |         out_cam = torch.from_numpy(out_cam).float()
163 |         
164 |         out_gt = self.gt_pose_list[index]
165 |         out_gt = torch.from_numpy(out_gt).float()
166 |         return out_pose, out_cam, out_gt
167 |     
168 |     def __len__(self):
169 |         return len(self.relative_global_pose_list)
170 | 
171 | 
172 | if __name__ == '__main__':
173 |     # dataset = AMASSDataset(data_path='/home/wangjian/Develop/BodyPoseOptimization/AMASSDataConverter/pkls',
174 |     #                        frame_num=5, windows_size=5,
175 |     #                        is_train=False)
176 |     # d = dataset[0]
177 |     # print(d.shape)
178 |     
179 |     dataset = Mo2Cap2Dataset(pkl_path='../test_data_weipeng_studio/data_start_887_end_987/test_data.pkl')
180 |     from utils.skeleton import Skeleton
181 |     
182 |     skeleton_model = Skeleton(calibration_path=None)
183 |     d = dataset.relative_global_pose_list[0]
184 |     skeleton_list = skeleton_model.joint_list_2_mesh_list(d)
185 |     open3d.visualization.draw_geometries(skeleton_list)
186 | 


--------------------------------------------------------------------------------
/calculate_errors.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from utils.rigid_transform_with_scale import umeyama
  4 | from utils.skeleton import Skeleton
  5 | from copy import deepcopy
  6 | 
  7 | 
  8 | def global_align_skeleton_seq(estimated_seq, gt_seq):
  9 |     estimated_seq = deepcopy(estimated_seq)
 10 |     gt_seq = deepcopy(gt_seq)
 11 |     estimated_seq = np.asarray(estimated_seq).reshape((-1, 3))
 12 |     gt_seq = np.asarray(gt_seq).reshape((-1, 3))
 13 |     # aligned_pose_list = np.zeros_like(estimated_seq)
 14 |     # for s in range(estimated_seq.shape[0]):
 15 |     #     pose_p = estimated_seq[s]
 16 |     #     pose_gt_bs = gt_seq[s]
 17 |     c, R, t = umeyama(estimated_seq, gt_seq)
 18 |     pose_p = estimated_seq.dot(R) * c + t
 19 |     # aligned_pose_list[s] = pose_p
 20 |     
 21 |     return pose_p.reshape((-1, 15, 3))
 22 | 
 23 | 
 24 | def calculate_error(estimated_seq, gt_seq):
 25 |     estimated_seq = np.asarray(estimated_seq)
 26 |     gt_seq = np.asarray(gt_seq)
 27 |     distance = estimated_seq - gt_seq
 28 |     distance = np.linalg.norm(distance, axis=-1)
 29 |     m_distance = np.mean(distance)
 30 |     return m_distance
 31 | 
 32 | 
 33 | def calculate_slam_error(estimated_seq, gt_seq, align=False):
 34 |     # seq shape: n_seq, 15, 3
 35 |     estimated_seq = np.asarray(estimated_seq)
 36 |     gt_seq = np.asarray(gt_seq)
 37 |     estimated_root_seq = (estimated_seq[:, 7, :] + estimated_seq[:, 11, :]) / 2
 38 |     gt_root_seq = (gt_seq[:, 7, :] + gt_seq[:, 11, :]) / 2
 39 |     
 40 |     if align is True:
 41 |         c, R, t = umeyama(estimated_root_seq, gt_root_seq)
 42 |         estimated_root_seq = estimated_root_seq.dot(R) * c + t
 43 |     
 44 |     distance = estimated_root_seq - gt_root_seq
 45 |     distance = np.linalg.norm(distance, axis=1)
 46 |     m_distance = np.mean(distance)
 47 |     return m_distance
 48 | 
 49 | def align_skeleton_size(estimated_seq, gt_seq):
 50 |     estimated_seq = deepcopy(np.asarray(estimated_seq))
 51 |     gt_seq = deepcopy(np.asarray(gt_seq))
 52 |     aligned_pose_list = np.zeros_like(estimated_seq)
 53 |     for s in range(estimated_seq.shape[0]):
 54 |         pose_p = estimated_seq[s]
 55 |         pose_gt_bs = gt_seq[s]
 56 |         c, R, t = umeyama(pose_p, pose_gt_bs)
 57 |         pose_p = pose_p * c
 58 |         aligned_pose_list[s] = pose_p
 59 | 
 60 |     return aligned_pose_list
 61 | 
 62 | def align_skeleton(estimated_seq, gt_seq, skeleton_model=None):
 63 |     estimated_seq = deepcopy(np.asarray(estimated_seq))
 64 |     gt_seq = deepcopy(np.asarray(gt_seq))
 65 |     if skeleton_model is not None:
 66 |         for i in range(len(estimated_seq)):
 67 |             estimated_seq[i] = skeleton_model.skeleton_resize_single(
 68 |                 estimated_seq[i],
 69 |                 bone_length_file='utils/fisheye/mean3D.mat')
 70 |         for i in range(len(gt_seq)):
 71 |             gt_seq[i] = skeleton_model.skeleton_resize_single(
 72 |                 gt_seq[i],
 73 |                 bone_length_file='utils/fisheye/mean3D.mat')
 74 |     
 75 |     aligned_pose_list = np.zeros_like(estimated_seq)
 76 |     for s in range(estimated_seq.shape[0]):
 77 |         pose_p = estimated_seq[s]
 78 |         pose_gt_bs = gt_seq[s]
 79 |         c, R, t = umeyama(pose_p, pose_gt_bs)
 80 |         pose_p = pose_p.dot(R) * c + t
 81 |         aligned_pose_list[s] = pose_p
 82 |     
 83 |     return aligned_pose_list, gt_seq
 84 | 
 85 | 
 86 | def align_single_skeleton(estimated_pose, gt_pose, skeleton_model=None):
 87 |     estimated_pose = deepcopy(np.asarray(estimated_pose))
 88 |     gt_pose = deepcopy(np.asarray(gt_pose))
 89 |     if skeleton_model is not None:
 90 |         estimated_pose = skeleton_model.skeleton_resize_single(
 91 |             estimated_pose,
 92 |             bone_length_file='utils/fisheye/mean3D.mat')
 93 |         gt_pose = skeleton_model.skeleton_resize_single(
 94 |             gt_pose,
 95 |             bone_length_file='utils/fisheye/mean3D.mat')
 96 | 
 97 | 
 98 | 
 99 |     c, R, t = umeyama(estimated_pose, gt_pose)
100 |     pose_p = estimated_pose.dot(R) * c + t
101 | 
102 |     return pose_p, gt_pose
103 | 
104 | 
105 | def calculate_joint_error(estimated_seq, gt_seq):
106 |     estimated_seq = np.asarray(estimated_seq)
107 |     gt_seq = np.asarray(gt_seq)
108 |     distance = estimated_seq - gt_seq
109 |     distance = np.linalg.norm(distance, axis=2)
110 |     joints_distance = np.mean(distance, axis=0)
111 |     return joints_distance
112 | 
113 | 
114 | def calculate_errors(final_estimated_seq, mid_estimated_seq, final_optimized_seq=None, final_gt_seq=None):
115 |     skeleton_model = Skeleton(None)
116 |     original_global_mpjpe = calculate_error(final_estimated_seq, final_gt_seq)
117 |     mid_global_mpjpe = calculate_error(mid_estimated_seq, final_gt_seq)
118 |     optimized_global_mpjpe = calculate_error(final_optimized_seq, final_gt_seq)
119 |     
120 |     original_camera_pos_error = calculate_slam_error(final_estimated_seq, final_gt_seq)
121 |     optimized_camera_pos_error = calculate_slam_error(final_optimized_seq, final_gt_seq)
122 |     
123 | 
124 |     
125 |     # align the estimated result and original result
126 |     
127 |     aligned_estimated_seq_result = global_align_skeleton_seq(final_estimated_seq, final_gt_seq)
128 |     aligned_estimated_mid_seq_result = global_align_skeleton_seq(mid_estimated_seq, final_gt_seq)
129 |     aligned_optimized_seq_result = global_align_skeleton_seq(final_optimized_seq, final_gt_seq)
130 | 
131 |     original_aligned_camera_pos_error = calculate_slam_error(aligned_estimated_seq_result, final_gt_seq, align=False)
132 |     mid_aligned_camera_pose_error = calculate_slam_error(aligned_estimated_mid_seq_result, final_gt_seq, align=False)
133 |     optimized_aligned_camera_pos_error = calculate_slam_error(aligned_optimized_seq_result, final_gt_seq, align=False)
134 |     
135 |     aligned_original_seq_mpjpe = calculate_error(aligned_estimated_seq_result, final_gt_seq)
136 |     aligned_mid_seq_mpjpe = calculate_error(aligned_estimated_mid_seq_result, final_gt_seq)
137 |     aligned_optimized_seq_mpjpe = calculate_error(aligned_optimized_seq_result, final_gt_seq)
138 |     
139 |     # align the estimated result and original result
140 |     aligned_estimated_result, final_gt_seq = align_skeleton(final_estimated_seq, final_gt_seq, None)
141 |     aligned_mid_optimized_result, final_gt_seq = align_skeleton(mid_estimated_seq, final_gt_seq, None)
142 |     aligned_optimized_result, final_gt_seq = align_skeleton(final_optimized_seq, final_gt_seq, None)
143 |     
144 |     aligned_original_mpjpe = calculate_error(aligned_estimated_result, final_gt_seq)
145 |     aligned_mid_optimized_mpjpe = calculate_error(aligned_mid_optimized_result, final_gt_seq)
146 |     aligned_optimized_mpjpe = calculate_error(aligned_optimized_result, final_gt_seq)
147 |     
148 |     # align the estimated result and original result
149 |     aligned_estimated_result, final_gt_seq = align_skeleton(final_estimated_seq, final_gt_seq, skeleton_model)
150 |     aligned_mid_optimized_result, final_gt_seq = align_skeleton(mid_estimated_seq, final_gt_seq, skeleton_model)
151 |     aligned_optimized_result, final_gt_seq = align_skeleton(final_optimized_seq, final_gt_seq, skeleton_model)
152 |     
153 |     bone_length_aligned_original_mpjpe = calculate_error(aligned_estimated_result, final_gt_seq)
154 |     bone_length_aligned_mid_optimized_mpjpe = calculate_error(aligned_mid_optimized_result, final_gt_seq)
155 |     bone_length_aligned_optimized_mpjpe = calculate_error(aligned_optimized_result, final_gt_seq)
156 |     joints_error = calculate_joint_error(aligned_optimized_result, final_gt_seq)
157 |     
158 |     from collections import OrderedDict
159 |     result = OrderedDict({'original_global_mpjpe': original_global_mpjpe,
160 |                           'mid_global_mpjpe': mid_global_mpjpe,
161 |                           'optimized_global_mpjpe': optimized_global_mpjpe,
162 |                           'original_camera_pos_error': original_camera_pos_error,
163 |                           'optimized_camera_pos_error': optimized_camera_pos_error,
164 |                           
165 |                           'original_aligned_camera_pos_error': original_aligned_camera_pos_error,
166 |                           'mid_aligned_camera_pose_error': mid_aligned_camera_pose_error,
167 |                           'optimized_aligned_camera_pos_error': optimized_aligned_camera_pos_error,
168 |                           
169 |                           'original_aligned_global_mpjpe': aligned_original_seq_mpjpe,
170 |                           "aligned_mid_seq_mpjpe": aligned_mid_seq_mpjpe,
171 |                           'optimized_aligned_global_mpjpe': aligned_optimized_seq_mpjpe,
172 |                           'aligned_original_mpjpe': aligned_original_mpjpe,
173 |                           'aligned_mid_optimized_mpjpe': aligned_mid_optimized_mpjpe,
174 |                           'aligned_optimized_mpjpe': aligned_optimized_mpjpe,
175 |                           'bone_length_aligned_original_mpjpe': bone_length_aligned_original_mpjpe,
176 |                           'bone_length_aligned_mid_optimized_mpjpe': bone_length_aligned_mid_optimized_mpjpe,
177 |                           'bone_length_aligned_optimized_mpjpe': bone_length_aligned_optimized_mpjpe,
178 |                           'joints_error': joints_error})
179 |     return result
180 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/process_test_data.py:
--------------------------------------------------------------------------------
  1 | # 1. slam result + local pose = global pose
  2 | # 2. save global pose result
  3 | import open3d
  4 | from utils.skeleton import Skeleton
  5 | import os
  6 | from slam_reader import SLAMReader
  7 | from scipy.io import loadmat
  8 | import numpy as np
  9 | from scipy.spatial.transform import Rotation
 10 | from tqdm import tqdm
 11 | from copy import deepcopy
 12 | import pickle
 13 | from natsort import natsorted
 14 | 
 15 | 
 16 | class TestDataPreprocessor:
 17 |     def __init__(self, slam_result_path, heatmap_dir, depth_dir, gt_path, start_frame, end_frame, fps,
 18 |                  mat_start_frame):
 19 |         self.heatmap_dir = heatmap_dir
 20 |         self.depth_dir = depth_dir
 21 |         self.slam_result_path = slam_result_path
 22 |         self.slam_reader = SLAMReader(fps=fps)
 23 |         self.start_frame = start_frame
 24 |         self.end_frame = end_frame
 25 |         self.skeleton_model = Skeleton(calibration_path='utils/fisheye/fisheye.calibration.json')
 26 |         self.gt_global_skeleton = self.load_gt_data(gt_path=gt_path, start_frame=start_frame, end_frame=end_frame,
 27 |                                                     mat_start_frame=mat_start_frame)
 28 |         self.local_pose_list, self.heatmap_list = self.get_local_pose(self.heatmap_dir, self.depth_dir,
 29 |                                                                       self.start_frame, self.end_frame)
 30 |         self.trajectory, self.R, self.t = self.slam_reader.read_trajectory_new(self.slam_result_path,
 31 |                                                                                self.local_pose_list,
 32 |                                                                                self.gt_global_skeleton,
 33 |                                                                                start_frame=start_frame,
 34 |                                                                                end_frame=end_frame)
 35 |         
 36 | 
 37 |     
 38 |     def load_gt_data(self, gt_path, start_frame, end_frame, mat_start_frame):
 39 |         with open(gt_path, 'rb') as f:
 40 |             pose_gt = pickle.load(f)
 41 |         clip = []
 42 |         for i in range(start_frame, end_frame):
 43 |             clip.append(pose_gt[i - mat_start_frame])
 44 |         
 45 |         skeleton_list = clip
 46 |         
 47 |         return skeleton_list
 48 |     
 49 |     def get_local_pose(self, heatmaps_dir, depths_dir, start_frame, end_frame):
 50 |         local_pose_list = []
 51 |         heatmap_list = []
 52 |         heatmaps_filename_list = natsorted(os.listdir(heatmaps_dir))[start_frame: end_frame]
 53 |         depths_filename_list = natsorted(os.listdir(depths_dir))[start_frame: end_frame]
 54 |         
 55 |         for heatmaps_filename, depths_filename in tqdm(zip(heatmaps_filename_list, depths_filename_list)):
 56 |             heatmap_path = os.path.join(heatmaps_dir, heatmaps_filename)
 57 |             depth_path = os.path.join(depths_dir, depths_filename)
 58 |             
 59 |             self.skeleton_model.set_skeleton_from_file(heatmap_file=heatmap_path,
 60 |                                                        depth_file=depth_path,
 61 |                                                        # bone_length_file='/home/wangjian/Develop/BodyPoseRefiner/utils/fisheye/mean3D.mat',
 62 |                                                        to_mesh=False)
 63 |             local_skeleton = self.skeleton_model.skeleton
 64 |             local_pose_list.append(local_skeleton)
 65 |             heatmap_mat = loadmat(heatmap_path)
 66 |             heatmap = heatmap_mat['heatmap']
 67 |             heatmap_list.append(heatmap)
 68 |         return local_pose_list, heatmap_list
 69 |     
 70 |     def render_body_sequence(self, visualization=False):
 71 |         global_skeleton_list = []
 72 |         for i in range(len(self.trajectory)):
 73 |             slam_coord = self.trajectory[i]
 74 |             local_skeleton = self.local_pose_list[i]
 75 |             local_skeleton_points = open3d.geometry.PointCloud()
 76 |             local_skeleton_points.points = open3d.utility.Vector3dVector(local_skeleton)
 77 |             local_skeleton_points = local_skeleton_points.transform(slam_coord)
 78 |             # local_skeleton_points = local_skeleton_points.rotate(-Rotation.from_matrix(self.R).as_euler('xyz'),
 79 |             #                                                      center=False)
 80 |             # local_skeleton_points = local_skeleton_points.translate(-self.t)
 81 |             global_skeleton = np.asarray(local_skeleton_points.points)
 82 |             global_skeleton_list.append(global_skeleton)
 83 |         # align head to zero position
 84 |         # head_position = deepcopy(global_skeleton_list[0][0])
 85 |         # for i in range(len(global_skeleton_list)):
 86 |         #     global_skeleton_list[i] -= head_position
 87 |         if visualization:
 88 |             mesh_list = []
 89 |             for skeleton in global_skeleton_list:
 90 |                 self.skeleton_model.skeleton = skeleton
 91 |                 self.skeleton_model.skeleton_to_mesh()
 92 |                 mesh_list.append(self.skeleton_model.skeleton_mesh)
 93 |             mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=1)
 94 |             open3d.visualization.draw_geometries(mesh_list + [mesh_frame])
 95 |             
 96 |             return global_skeleton_list, mesh_list
 97 |         else:
 98 |             return global_skeleton_list, []
 99 |     
100 |     def rotate_gt_global_skeleton(self, gt_skeleton_list, R, t):
101 |         result_gt_skeleton_list = []
102 |         for i in range(len(gt_skeleton_list)):
103 |             gt_skeleton = gt_skeleton_list[i]
104 |             gt_skeleton_points = open3d.geometry.PointCloud()
105 |             gt_skeleton_points.points = open3d.utility.Vector3dVector(gt_skeleton)
106 |             gt_skeleton_points = gt_skeleton_points.rotate(-Rotation.from_matrix(R).as_euler('xyz'),
107 |                                                            center=False)
108 |             gt_skeleton_points = gt_skeleton_points.translate(t)
109 |             gt_skeleton = np.asarray(gt_skeleton_points.points)
110 |             result_gt_skeleton_list.append(gt_skeleton)
111 |         return result_gt_skeleton_list
112 |     
113 |     def align_estimated_and_gt_skeleton(self, estimated_skeleton_list, gt_skeleton_list):
114 |         estimated_skeleton_head_position = estimated_skeleton_list[0][0]
115 |         gt_head_position = gt_skeleton_list[0][0]
116 |         
117 |         distance = estimated_skeleton_head_position - gt_head_position
118 |         
119 |         distance = deepcopy(distance)
120 |         for gt_pose in gt_skeleton_list:
121 |             gt_pose += distance
122 |         return gt_skeleton_list
123 | 
124 | 
125 | def main(slam_result_path, heatmap_dir, depth_dir, gt_path,
126 |          start_frame, end_frame, out_dir, fps, mat_start_frame):
127 |     visualizer = TestDataPreprocessor(
128 |         slam_result_path=slam_result_path,
129 |         heatmap_dir=heatmap_dir,
130 |         depth_dir=depth_dir,
131 |         gt_path=gt_path,
132 |         start_frame=start_frame,
133 |         end_frame=end_frame,
134 |         fps=fps,
135 |         mat_start_frame=mat_start_frame)
136 |     
137 |     # out_dir = 'corrected_data/studio-lingjie1_visualization/{}'.format(out_data_id)
138 |     if not os.path.isdir(out_dir):
139 |         os.makedirs(out_dir)
140 |     out_path = os.path.join(out_dir, 'test_data.pkl')
141 |     
142 |     local_estimated_skeleton_list = visualizer.local_pose_list
143 |     heatmap_list = visualizer.heatmap_list
144 |     slam_result_list = visualizer.trajectory
145 |     
146 |     estimated_skeleton_list, estimated_mesh_list = visualizer.render_body_sequence(visualization=False)
147 |     rotated_gt_global_pose_list = visualizer.gt_global_skeleton
148 |     
149 |     out = {
150 |         "gt_global_skeleton": rotated_gt_global_pose_list,
151 |         'estimated_global_skeleton': estimated_skeleton_list,
152 |         'estimated_local_skeleton': local_estimated_skeleton_list,
153 |         'camera_pose_list': slam_result_list,
154 |         "heatmap_list":heatmap_list
155 |     }
156 |     with open(out_path, 'wb') as f:
157 |         pickle.dump(out, f)
158 |     
159 |     mpjpe = []
160 |     for i in range(len(rotated_gt_global_pose_list)):
161 |         distance = rotated_gt_global_pose_list[i] - estimated_skeleton_list[i]
162 |         distance = np.linalg.norm(distance, axis=1)
163 |         mpjpe.append(np.mean(distance))
164 |     print("The initial mpjpe is: {}".format(np.mean(mpjpe)))
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     slam_result_path = r'data/studio-lingjie1/frame_trajectory.txt',
169 |     heatmap_dir = r'\\winfs-inf\HPS\Mo2Cap2Plus1\static00\EgocentricData\REC23102020\studio-lingjie1\heatmaps',
170 |     depth_dir = r'\\winfs-inf\HPS\Mo2Cap2Plus1\static00\EgocentricData\REC23102020\studio-lingjie1\depths',
171 |     gt_path = 'data/studio-lingjie1/lingjie1.pkl',
172 | 
173 |     total_start_frame = 551
174 |     total_end_frame = 3300
175 | 
176 | 
177 |     test_size = 100
178 |     for i in range(total_start_frame, total_end_frame - test_size, test_size):
179 |         print("running test sequence from {} to {}".format(i, i + test_size))
180 |         start_frame = i
181 |         end_frame = i + test_size
182 |         out_dir = 'corrected_data/studio-lingjie1_visualization/data_start_{}_end_{}'.format(start_frame, end_frame)
183 |         main(slam_result_path, heatmap_dir, depth_dir, gt_path,
184 |              start_frame, end_frame, out_dir, fps=25, mat_start_frame=total_start_frame)
185 | 


--------------------------------------------------------------------------------
/networks/train_local.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append('..')
  4 | import torch
  5 | import numpy as np
  6 | 
  7 | from config import args
  8 | import datetime
  9 | import os
 10 | from torch.utils.data import DataLoader
 11 | from tqdm import tqdm
 12 | from torch.utils.tensorboard import SummaryWriter
 13 | from torch.optim import Adam
 14 | from pprint import pprint
 15 | from models.SeqConvVAE import ConvVAE
 16 | 
 17 | 
 18 | def prepare_log_dir(log_dir=None):
 19 |     # prepare log dirs
 20 |     if args.log_dir is None and log_dir is None:
 21 |         log_dir = datetime.datetime.now().strftime('%m.%d-%H:%M:%S')
 22 |     elif log_dir is None:
 23 |         log_dir = args.log_dir
 24 |     log_dir = os.path.join('logs', log_dir)
 25 |     print('making save dir at: {}'.format(log_dir))
 26 |     os.makedirs(log_dir)
 27 |     checkpoints_dir = os.path.join(log_dir, 'checkpoints')
 28 |     tensorboard_dir = os.path.join(log_dir, 'tensorboard')
 29 |     os.makedirs(checkpoints_dir)
 30 |     os.makedirs(tensorboard_dir)
 31 |     return checkpoints_dir, tensorboard_dir
 32 | 
 33 | 
 34 | class Train:
 35 |     def __init__(self):
 36 |         from dataset.local_dataset import AMASSDataset
 37 |         self.seq_length = args.seq_length
 38 |         self.attention = args.attention
 39 |         
 40 |         self.amass_train_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=True,
 41 |                                                 windows_size=args.slide_window_step, balance_distrib=args.data_balance,
 42 |                                                 fps=args.fps, with_mo2cap2_data=args.with_mo2cap2_data)
 43 |         self.train_dataloader = DataLoader(dataset=self.amass_train_dataset, batch_size=args.batch_size, shuffle=True,
 44 |                                            drop_last=True, num_workers=args.num_workers)
 45 |         
 46 |         self.test_dataset = AMASSDataset(data_path=args.train_data_path, frame_num=self.seq_length, is_train=False,
 47 |                                          windows_size=args.slide_window_step, balance_distrib=args.data_balance,
 48 |                                          fps=args.fps, with_mo2cap2_data=args.with_mo2cap2_data)
 49 |         self.test_dataloader = DataLoader(self.test_dataset, batch_size=args.batch_size, shuffle=False,
 50 |                                           drop_last=False, num_workers=args.num_workers)
 51 |         if args.network == 'cnn':
 52 |             self.network = ConvVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim,
 53 |                                    seq_len=self.seq_length)
 54 |         elif args.network == 'rnn':
 55 |             self.network = RecurrentVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim,
 56 |                                         seq_len=self.seq_length)
 57 |         elif args.network == 'attention':
 58 |             self.network = AttentionVAE(in_channels=45, out_channels=45, latent_dim=args.latent_dim,
 59 |                                         seq_len=self.seq_length, attention_type=self.attention)
 60 |         elif args.network == 'meva':
 61 |             self.network = MEVA(nx=45, t_total=self.seq_length, latent_size=args.latent_dim)
 62 | 
 63 |         elif args.network == 'vibe':
 64 |             self.network = RecurrentVAE_VIBE(in_channels=45, out_channels=45, seq_len=self.seq_length,
 65 |                                              latent_dim=args.latent_dim, hidden_size=args.latent_dim)
 66 |         elif args.network == 'mlp':
 67 |             from networks.models.MLP import MLPVAE
 68 |             self.network = MLPVAE(in_features=self.seq_length * 45, out_features=self.seq_length * 45,
 69 |                                   seq_len=self.seq_length,
 70 |                                   latent_dim=args.latent_dim)
 71 |         else:
 72 |             raise Exception('wrong network type!')
 73 |         
 74 |         self.optimizer = Adam(params=self.network.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
 75 |         # self.l2_loss = torch.nn.MSELoss(reduction='elementwise_mean')
 76 |         # self.l2_loss = torch.nn.MSELoss()
 77 |         
 78 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 79 |         
 80 |         self.network = self.network.to(self.device)
 81 |         
 82 |         self.checkpoint_dir, tensorboard_dir = prepare_log_dir()
 83 |         self.tensorboard_writer = SummaryWriter(tensorboard_dir)
 84 |         
 85 |     
 86 |     def train(self):
 87 |         print('---------------------Start Training-----------------------')
 88 |         pprint(args.__dict__)
 89 |         running_loss = 0
 90 |         running_recon_loss = 0
 91 |         count = 0
 92 |         for e in range(args.epoch):
 93 |             self.network.train()
 94 |             print('-----------------Epoch: {}-----------------'.format(str(e)))
 95 |             if args.new_dataset:
 96 |                 for i, (relative_global_pose, local_pose, camera_matrix) in tqdm(enumerate(self.train_dataloader)):
 97 |                     self.optimizer.zero_grad()
 98 |                     # relative_global_pose = relative_global_pose.to(self.device)
 99 |                     local_pose = local_pose.to(self.device)
100 |                     # camera_matrix = camera_matrix.to(self.device)
101 |     
102 |                     pose_preds, pose_input, mu, log_var = self.network(local_pose)
103 |                     loss, recon_loss, kld_loss = self.network.loss_function(pose_preds, pose_input, mu, log_var,
104 |                                                                             M_N=args.kl_weight * 64 / len(
105 |                                                                                 self.amass_train_dataset))
106 |                     loss.backward()
107 |                     self.optimizer.step()
108 |     
109 |                     running_loss += loss.item()
110 |                     running_recon_loss += recon_loss.item()
111 |                     # logs
112 |                     if count % args.log_step == 0 and count != 0:
113 |                         # ...log the running loss
114 |                         self.tensorboard_writer.add_scalar('training loss', running_loss, count)
115 |                         self.tensorboard_writer.add_scalar('training local recon loss', running_recon_loss, count)
116 |                         print("running loss is: {}".format(running_loss))
117 |                         print("running local recon loss is: {}".format(running_recon_loss))
118 |                         running_loss = 0
119 |                         running_recon_loss = 0
120 |                     count += 1
121 |             else:
122 |                 for i, local_pose in tqdm(enumerate(self.train_dataloader)):
123 |                     self.optimizer.zero_grad()
124 |                     local_pose = local_pose.to(self.device)
125 |         
126 |                     pose_preds, pose_input, mu, log_var = self.network(local_pose)
127 |                     loss, recon_loss, kld_loss = self.network.loss_function(pose_preds, pose_input, mu, log_var,
128 |                                                                             M_N=args.kl_weight * 64 / len(
129 |                                                                                 self.amass_train_dataset))
130 |                     loss.backward()
131 |                     self.optimizer.step()
132 |         
133 |                     running_loss += loss.item()
134 |                     running_recon_loss += recon_loss.item()
135 |                     # logs
136 |                     if count % args.log_step == 0 and count != 0:
137 |                         # ...log the running loss
138 |                         self.tensorboard_writer.add_scalar('training loss', running_loss, count)
139 |                         self.tensorboard_writer.add_scalar('training local recon loss', running_recon_loss, count)
140 |                         print("running loss is: {}".format(running_loss))
141 |                         print("running local recon loss is: {}".format(running_recon_loss))
142 |                         running_loss = 0
143 |                         running_recon_loss = 0
144 |                     count += 1
145 |             # log the logs
146 |             eval_loss = self.eval()
147 |             self.tensorboard_writer.add_scalar('eval loss', eval_loss, e)
148 |             print('eval loss is: {}'.format(eval_loss))
149 |             
150 |             torch.save({
151 |                 'epoch': e + 1,
152 |                 'args': args.__dict__,
153 |                 'state_dict': self.network.state_dict(),
154 |                 'eval_result': eval_loss,
155 |                 'optimizer': self.optimizer.state_dict(),
156 |             }, os.path.join(self.checkpoint_dir, str(e) + '.pth.tar'))
157 |     
158 |     def eval(self):
159 |         print('---------------------Start Eval-----------------------')
160 |         self.network.eval()
161 |         mpjpe_list = []
162 |         with torch.no_grad():
163 |             if args.new_dataset:
164 |                 for i, (relative_global_pose, local_pose, camera_matrix) in tqdm(enumerate(self.test_dataloader)):
165 |                     local_pose = local_pose.to(self.device)
166 |                     pose_preds, pose_input, _, _ = self.network(local_pose)
167 |                     pose_preds = pose_preds.cpu().numpy()
168 |                     pose_input = pose_input.cpu().numpy()
169 |                     pose_preds = np.reshape(pose_preds, [-1, self.seq_length, 15, 3])
170 |                     pose_gt = np.reshape(pose_input, [-1, self.seq_length, 15, 3])
171 |                     mpjpe_list.append(self.mpjpe(pose_preds, pose_gt))
172 |             else:
173 |                 for i, local_pose in tqdm(enumerate(self.test_dataloader)):
174 |                     local_pose = local_pose.to(self.device)
175 |                     pose_preds, pose_input, _, _ = self.network(local_pose)
176 |                     pose_preds = pose_preds.cpu().numpy()
177 |                     pose_input = pose_input.cpu().numpy()
178 |                     pose_preds = np.reshape(pose_preds, [-1, self.seq_length, 15, 3])
179 |                     pose_gt = np.reshape(pose_input, [-1, self.seq_length, 15, 3])
180 |                     mpjpe_list.append(self.mpjpe(pose_preds, pose_gt))
181 |         
182 |         print('MPJPE is: {}'.format(np.mean(mpjpe_list)))
183 |         return np.mean(mpjpe_list)
184 |     
185 |     def mpjpe(self, pose_preds, pose_gt):
186 |         distance = np.linalg.norm(pose_gt - pose_preds, axis=3)
187 |         return np.mean(distance)
188 | 
189 | 
190 | if __name__ == '__main__':
191 |     train = Train()
192 |     train.train()
193 | 


--------------------------------------------------------------------------------
/networks/models/SeqConvVAE.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from models.BaseVAE import BaseVAE
  3 | # from networks.BaseVAE import BaseVAE
  4 | from torch import nn
  5 | from torch.nn import functional as F
  6 | from copy import deepcopy
  7 | 
  8 | 
  9 | class ConvVAE(BaseVAE):
 10 |     kinematic_parents = [0, 0, 1, 2, 0, 4, 5, 1, 7, 8, 9, 4, 11, 12, 13]
 11 |     def __init__(self,
 12 |                  in_channels,
 13 |                  out_channels,
 14 |                  latent_dim,
 15 |                  seq_len,
 16 |                  hidden_dims=None,
 17 |                  with_bone_length=False,
 18 |                  **kwargs) -> None:
 19 |         super(ConvVAE, self).__init__()
 20 |         
 21 |         self.latent_dim = latent_dim
 22 |         self.in_channels = in_channels
 23 |         self.out_channels = out_channels
 24 |         self.seq_len = seq_len
 25 |         self.with_bone_length = with_bone_length
 26 |         
 27 |         modules = []
 28 |         self.encoder_hidden_dims = deepcopy(hidden_dims)
 29 |         if hidden_dims is None:
 30 |             hidden_dims = [64, 64, 128, 256, 512]
 31 |         
 32 |         # Build Encoder
 33 |         for h_dim in hidden_dims:
 34 |             modules.append(
 35 |                 nn.Sequential(
 36 |                     nn.Conv1d(in_channels, out_channels=h_dim, kernel_size=3, stride=1, padding=1),
 37 |                     nn.BatchNorm1d(h_dim),
 38 |                     nn.LeakyReLU())
 39 |             )
 40 |             in_channels = h_dim
 41 |         
 42 |         self.encoder = nn.Sequential(*modules)
 43 |         self.encoder_out_dim = hidden_dims[-1] * self.seq_len
 44 |         self.fc_mu = nn.Linear(self.encoder_out_dim, latent_dim)
 45 |         self.fc_var = nn.Linear(self.encoder_out_dim, latent_dim)
 46 |         
 47 |         if self.with_bone_length is True:
 48 |             self.bone_length_encode_layer = nn.Sequential(
 49 |                 nn.Linear(in_features=self.seq_len * 15, out_features=512),
 50 |                 nn.BatchNorm1d(num_features=512),
 51 |                 nn.LeakyReLU()
 52 |             )
 53 |             self.fusion_layer = nn.Sequential(
 54 |                 nn.Linear(in_features=512 + hidden_dims[-1] * self.seq_len, out_features=hidden_dims[-1] * self.seq_len),
 55 |                 nn.BatchNorm1d(num_features=hidden_dims[-1] * self.seq_len),
 56 |                 nn.LeakyReLU()
 57 |             )
 58 |         
 59 |         # Build Decoder
 60 |         modules = []
 61 |         
 62 |         self.decoder_input = nn.Linear(latent_dim, hidden_dims[-1] * self.seq_len)
 63 |         
 64 |         hidden_dims.reverse()
 65 |         self.decoder_hidden_dims = deepcopy(hidden_dims)
 66 |         
 67 |         for i in range(len(hidden_dims) - 1):
 68 |             modules.append(
 69 |                 nn.Sequential(
 70 |                     nn.ConvTranspose1d(hidden_dims[i],
 71 |                                        hidden_dims[i + 1],
 72 |                                        kernel_size=3,
 73 |                                        stride=1,
 74 |                                        padding=1,
 75 |                                        output_padding=0),
 76 |                     nn.BatchNorm1d(hidden_dims[i + 1]),
 77 |                     nn.LeakyReLU())
 78 |             )
 79 |         
 80 |         self.decoder = nn.Sequential(*modules)
 81 |         
 82 |         self.final_layer = nn.Sequential(
 83 |             nn.ConvTranspose1d(hidden_dims[-1],
 84 |                                hidden_dims[-1],
 85 |                                kernel_size=3,
 86 |                                stride=1,
 87 |                                padding=1,
 88 |                                output_padding=0),
 89 |             nn.BatchNorm1d(hidden_dims[-1]),
 90 |             nn.LeakyReLU(),
 91 |             nn.Conv1d(hidden_dims[-1], out_channels=self.out_channels,
 92 |                       kernel_size=3, padding=1))
 93 |         
 94 |         
 95 |         
 96 |     
 97 |     def encode(self, pose_input: torch.Tensor):
 98 |         """
 99 |         Encodes the input by passing through the encoder network
100 |         and returns the latent codes.
101 |         :param pose_input: (Tensor) Input tensor to encoder [N x C x H x W]
102 |         :return: (Tensor) List of latent codes
103 |         """
104 |         result = self.encoder(pose_input)
105 |         result = torch.flatten(result, start_dim=1)
106 |         
107 |         if self.with_bone_length is True:
108 |             bone_length_encoding = self.bone_length_encoding(pose_input)
109 |             result = torch.cat([result, bone_length_encoding], dim=1)
110 |             result = self.fusion_layer(result)
111 |         # Split the result into mu and var components
112 |         # of the latent Gaussian distribution
113 |         mu = self.fc_mu(result)
114 |         log_var = self.fc_var(result)
115 |         
116 |         return [mu, log_var]
117 |     
118 |     def decode(self, z: torch.Tensor):
119 |         """
120 |         Maps the given latent codes
121 |         onto the image space.
122 |         :param z: (Tensor) [B x D]
123 |         :return: (Tensor) [B x C x H x W]
124 |         """
125 |         result = self.decoder_input(z)
126 |         result = result.view(-1, self.decoder_hidden_dims[0], self.seq_len)
127 |         result = self.decoder(result)
128 |         result = self.final_layer(result)
129 |         return result
130 |     
131 |     def decode_to_bodypose(self, z: torch.Tensor):
132 |         # result: (n_batch, 45, seq_len)
133 |         # output: (n_batch, seq_len, 15, 3)
134 |         result = self.decoder_input(z)
135 |         result = result.view(-1, self.decoder_hidden_dims[0], self.seq_len)
136 |         result = self.decoder(result)
137 |         result = self.final_layer(result)
138 |         assert result.shape[1] == 45
139 |         output = result.permute([0, 2, 1]).view((-1, self.seq_len, 15, 3))
140 |         return output
141 |     
142 |     def calculate_bone_length(self, skeleton: torch.Tensor):
143 |         # skeleton shape: (seq_len, 15 * 3)
144 |         skeleton = skeleton.view((-1, self.seq_len, 15, 3))
145 |         bone_array = skeleton - skeleton[:, :, self.kinematic_parents, :]
146 |         bone_length = torch.norm(bone_array, dim=3)
147 |         return bone_length
148 |     
149 |     def bone_length_encoding(self, pose_input):
150 |         pose_input = pose_input.view(-1, self.seq_len, 15, 3)
151 |         # calculate the bone length
152 |         bone_length = self.calculate_bone_length(pose_input)
153 |         bone_length = bone_length.view(-1, self.seq_len * 15)
154 |         bone_length_encoding = self.bone_length_encode_layer(bone_length)
155 |         return bone_length_encoding
156 |         
157 |         
158 |     
159 |     def reparameterize(self, mu: torch.Tensor, logvar: torch.Tensor):
160 |         """
161 |         Reparameterization trick to sample from N(mu, var) from
162 |         N(0,1).
163 |         :param mu: (Tensor) Mean of the latent Gaussian [B x D]
164 |         :param logvar: (Tensor) Standard deviation of the latent Gaussian [B x D]
165 |         :return: (Tensor) [B x D]
166 |         """
167 |         std = torch.exp(0.5 * logvar)
168 |         eps = torch.randn_like(std)
169 |         return eps * std + mu
170 |     
171 |     def forward(self, pose_input: torch.Tensor, **kwargs):
172 |         """
173 |         :param pose_input: shape: n_batch, seq_length, 15*3
174 |         :param kwargs:
175 |         :return:
176 |         """
177 |         pose_input_permuted = pose_input.permute((0, 2, 1)).contiguous()
178 |         mu, log_var = self.encode(pose_input_permuted)
179 |         z = self.reparameterize(mu, log_var)
180 |         out = self.decode(z)
181 |         out = out.permute((0, 2, 1))
182 |         return out, pose_input, mu, log_var
183 |     
184 |     def get_latent_space(self, pose_input):
185 |         pose_input_permuted = pose_input.permute((0, 2, 1)).contiguous()
186 |         mu, log_var = self.encode(pose_input_permuted)
187 |         std = torch.exp(0.5 * log_var)
188 |         z = self.reparameterize(mu, log_var)
189 |         return mu, std, z
190 |     
191 |     def loss_function(self,
192 |                       *args,
193 |                       **kwargs):
194 |         """
195 |         Computes the VAE loss function.
196 |         KL(N(\mu, \sigma), N(0, 1)) = \log \frac{1}{\sigma} + \frac{\sigma^2 + \mu^2}{2} - \frac{1}{2}
197 |         :param args:
198 |         :param kwargs:
199 |         :return:
200 |         """
201 |         recons = args[0]
202 |         input = args[1]
203 |         mu = args[2]
204 |         log_var = args[3]
205 | 
206 |         if 'M_N' in kwargs:
207 |             kld_weight = kwargs['M_N']  # Account for the minibatch samples from the dataset
208 |             # print(kld_weight)
209 |             recons_loss = F.mse_loss(recons, input)
210 | 
211 |             kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim=1), dim=0)
212 |         else:
213 |             kld_weight = kwargs['kl_weight']
214 |             recons_loss = F.mse_loss(recons, input, reduction='sum')
215 | 
216 |             kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim=1), dim=0)
217 |         
218 |         loss = recons_loss + kld_weight * kld_loss
219 |         return loss, recons_loss, kld_loss
220 |     
221 |     def sample(self, num_samples, current_device, **kwargs):
222 |         """
223 |         Samples from the latent space and return the corresponding
224 |         image space map.
225 |         :param num_samples: Number of samples
226 |         :param current_device:  Device to run the model
227 |         :return: (Tensor)
228 |         """
229 |         z = torch.randn(num_samples, self.latent_dim)
230 |         
231 |         z = z.to(current_device)
232 |         
233 |         samples = self.decode(z)
234 |         samples = samples.permute((0, 2, 1))
235 |         return samples
236 | 
237 |     
238 |     def generate(self, x: torch.Tensor, **kwargs):
239 |         """
240 |         Given an input image x, returns the reconstructed image
241 |         :param x: (Tensor) [B x C x H x W]
242 |         :return: (Tensor) [B x C x H x W]
243 |         """
244 |         
245 |         return self.forward(x)[0]
246 | 
247 | if __name__ == '__main__':
248 |     convVAE = ConvVAE(in_channels=45, out_channels=45, latent_dim=128, seq_len=5, with_bone_length=True)
249 |     pose = torch.zeros(4, 5, 15 * 3)
250 |     # pose = pose.permute((0, 2, 1)).contiguous()
251 |     result, pose_in, _, _ = convVAE(pose)
252 |     print(result.shape)
253 |     # print(pose_in)
254 |     
255 |     # sampled_result = convVAE.sample(20, torch.device('cpu'))
256 |     #
257 |     # print(sampled_result.shape)


--------------------------------------------------------------------------------
/MakeDataForOptimization/bvh_reader/npybvh/bvh.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import re
  3 | from transforms3d.euler import euler2mat, mat2euler
  4 | 
  5 | 
  6 | class BvhJoint:
  7 |     def __init__(self, name, parent):
  8 |         self.name = name
  9 |         self.parent = parent
 10 |         self.offset = np.zeros(3)
 11 |         self.channels = []
 12 |         self.children = []
 13 | 
 14 |     def add_child(self, child):
 15 |         self.children.append(child)
 16 | 
 17 |     def __repr__(self):
 18 |         return self.name
 19 | 
 20 |     def position_animated(self):
 21 |         return any([x.endswith('position') for x in self.channels])
 22 | 
 23 |     def rotation_animated(self):
 24 |         return any([x.endswith('rotation') for x in self.channels])
 25 | 
 26 | 
 27 | class Bvh:
 28 |     def __init__(self):
 29 |         self.joints = {}
 30 |         self.root = None
 31 |         self.keyframes = None
 32 |         self.frames = 0
 33 |         self.fps = 0
 34 | 
 35 |     def _parse_hierarchy(self, text):
 36 |         lines = re.split('\\s*\\n+\\s*', text)
 37 | 
 38 |         joint_stack = []
 39 | 
 40 |         for line in lines:
 41 |             words = re.split('\\s+', line)
 42 |             instruction = words[0]
 43 | 
 44 |             if instruction == "JOINT" or instruction == "ROOT":
 45 |                 parent = joint_stack[-1] if instruction == "JOINT" else None
 46 |                 joint = BvhJoint(words[1], parent)
 47 |                 self.joints[joint.name] = joint
 48 |                 if parent:
 49 |                     parent.add_child(joint)
 50 |                 joint_stack.append(joint)
 51 |                 if instruction == "ROOT":
 52 |                     self.root = joint
 53 |             elif instruction == "CHANNELS":
 54 |                 for i in range(2, len(words)):
 55 |                     joint_stack[-1].channels.append(words[i])
 56 |             elif instruction == "OFFSET":
 57 |                 for i in range(1, len(words)):
 58 |                     joint_stack[-1].offset[i - 1] = float(words[i])
 59 |             elif instruction == "End":
 60 |                 joint = BvhJoint(joint_stack[-1].name + "_end", joint_stack[-1])
 61 |                 joint_stack[-1].add_child(joint)
 62 |                 joint_stack.append(joint)
 63 |                 self.joints[joint.name] = joint
 64 |             elif instruction == '}':
 65 |                 joint_stack.pop()
 66 | 
 67 |     def _add_pose_recursive(self, joint, offset, poses):
 68 |         pose = joint.offset + offset
 69 |         poses.append(pose)
 70 | 
 71 |         for c in joint.children:
 72 |             self._add_pose_recursive(c, pose, poses)
 73 | 
 74 |     def plot_hierarchy(self):
 75 |         import matplotlib.pyplot as plt
 76 |         from mpl_toolkits.mplot3d import axes3d, Axes3D
 77 |         poses = []
 78 |         self._add_pose_recursive(self.root, np.zeros(3), poses)
 79 |         pos = np.array(poses)
 80 | 
 81 |         fig = plt.figure()
 82 |         ax = fig.add_subplot(111, projection='3d')
 83 |         ax.scatter(pos[:, 0], pos[:, 2], pos[:, 1])
 84 |         ax.set_xlim(-30, 30)
 85 |         ax.set_ylim(-30, 30)
 86 |         ax.set_zlim(-30, 30)
 87 |         plt.show()
 88 | 
 89 |     def parse_motion(self, text):
 90 |         lines = re.split('\\s*\\n+\\s*', text)
 91 | 
 92 |         frame = 0
 93 |         for line in lines:
 94 |             if line == '':
 95 |                 continue
 96 |             words = re.split('\\s+', line)
 97 | 
 98 |             if line.startswith("Frame Time:"):
 99 |                 self.fps = round(1 / float(words[2]))
100 |                 continue
101 |             if line.startswith("Frames:"):
102 |                 self.frames = int(words[1])
103 |                 continue
104 | 
105 |             if self.keyframes is None:
106 |                 self.keyframes = np.empty((self.frames, len(words)), dtype=np.float32)
107 | 
108 |             for angle_index in range(len(words)):
109 |                 self.keyframes[frame, angle_index] = float(words[angle_index])
110 | 
111 |             frame += 1
112 | 
113 |     def parse_string(self, text):
114 |         hierarchy, motion = text.split("MOTION")
115 |         self._parse_hierarchy(hierarchy)
116 |         self.parse_motion(motion)
117 | 
118 |     def _extract_rotation(self, frame_pose, index_offset, joint):
119 |         local_rotation = np.zeros(3)
120 |         for channel in joint.channels:
121 |             if channel.endswith("position"):
122 |                 continue
123 |             if channel == "Xrotation":
124 |                 local_rotation[0] = frame_pose[index_offset]
125 |             elif channel == "Yrotation":
126 |                 local_rotation[1] = frame_pose[index_offset]
127 |             elif channel == "Zrotation":
128 |                 local_rotation[2] = frame_pose[index_offset]
129 |             else:
130 |                 raise Exception(f"Unknown channel {channel}")
131 |             index_offset += 1
132 | 
133 |         local_rotation = np.deg2rad(local_rotation)
134 |         M_rotation = np.eye(3)
135 |         for channel in joint.channels:
136 |             if channel.endswith("position"):
137 |                 continue
138 | 
139 |             if channel == "Xrotation":
140 |                 euler_rot = np.array([local_rotation[0], 0., 0.])
141 |             elif channel == "Yrotation":
142 |                 euler_rot = np.array([0., local_rotation[1], 0.])
143 |             elif channel == "Zrotation":
144 |                 euler_rot = np.array([0., 0., local_rotation[2]])
145 |             else:
146 |                 raise Exception(f"Unknown channel {channel}")
147 | 
148 |             M_channel = euler2mat(*euler_rot)
149 |             M_rotation = M_rotation.dot(M_channel)
150 | 
151 |         return M_rotation, index_offset
152 | 
153 |     def _extract_position(self, joint, frame_pose, index_offset):
154 |         offset_position = np.zeros(3)
155 |         for channel in joint.channels:
156 |             if channel.endswith("rotation"):
157 |                 continue
158 |             if channel == "Xposition":
159 |                 offset_position[0] = frame_pose[index_offset]
160 |             elif channel == "Yposition":
161 |                 offset_position[1] = frame_pose[index_offset]
162 |             elif channel == "Zposition":
163 |                 offset_position[2] = frame_pose[index_offset]
164 |             else:
165 |                 raise Exception(f"Unknown channel {channel}")
166 |             index_offset += 1
167 | 
168 |         return offset_position, index_offset
169 | 
170 |     def _recursive_apply_frame(self, joint, frame_pose, index_offset, p, r, M_parent, p_parent):
171 |         if joint.position_animated():
172 |             offset_position, index_offset = self._extract_position(joint, frame_pose, index_offset)
173 |         else:
174 |             offset_position = np.zeros(3)
175 | 
176 |         if len(joint.channels) == 0:
177 |             joint_index = list(self.joints.values()).index(joint)
178 |             p[joint_index] = p_parent + M_parent.dot(joint.offset)
179 |             r[joint_index] = mat2euler(M_parent)
180 |             return index_offset
181 | 
182 |         if joint.rotation_animated():
183 |             M_rotation, index_offset = self._extract_rotation(frame_pose, index_offset, joint)
184 |         else:
185 |             M_rotation = np.eye(3)
186 | 
187 |         M = M_parent.dot(M_rotation)
188 |         position = p_parent + M_parent.dot(joint.offset) + offset_position
189 | 
190 |         rotation = np.rad2deg(mat2euler(M))
191 |         joint_index = list(self.joints.values()).index(joint)
192 |         p[joint_index] = position
193 |         r[joint_index] = rotation
194 | 
195 |         for c in joint.children:
196 |             index_offset = self._recursive_apply_frame(c, frame_pose, index_offset, p, r, M, position)
197 | 
198 |         return index_offset
199 | 
200 |     def frame_pose(self, frame):
201 |         p = np.empty((len(self.joints), 3))
202 |         r = np.empty((len(self.joints), 3))
203 |         frame_pose = self.keyframes[frame]
204 |         M_parent = np.zeros((3, 3))
205 |         M_parent[0, 0] = 1
206 |         M_parent[1, 1] = 1
207 |         M_parent[2, 2] = 1
208 |         self._recursive_apply_frame(self.root, frame_pose, 0, p, r, M_parent, np.zeros(3))
209 | 
210 |         return p, r
211 | 
212 |     def all_frame_poses(self):
213 |         p = np.empty((self.frames, len(self.joints), 3))
214 |         r = np.empty((self.frames, len(self.joints), 3))
215 | 
216 |         for frame in range(len(self.keyframes)):
217 |             p[frame], r[frame] = self.frame_pose(frame)
218 | 
219 |         return p, r
220 | 
221 |     def _plot_pose(self, p, r, fig=None, ax=None):
222 |         import matplotlib.pyplot as plt
223 |         from mpl_toolkits.mplot3d import axes3d, Axes3D
224 |         if fig is None:
225 |             fig = plt.figure()
226 |         if ax is None:
227 |             ax = fig.add_subplot(111, projection='3d')
228 | 
229 |         ax.cla()
230 |         ax.scatter(p[:, 0], p[:, 2], p[:, 1])
231 |         ax.set_xlim(-30, 30)
232 |         ax.set_ylim(-30, 30)
233 |         ax.set_zlim(-1, 59)
234 | 
235 |         plt.draw()
236 |         plt.pause(0.001)
237 | 
238 |     def plot_frame(self, frame, fig=None, ax=None):
239 |         p, r = self.frame_pose(frame)
240 |         self._plot_pose(p, r, fig, ax)
241 | 
242 |     def joint_names(self):
243 |         return self.joints.keys()
244 | 
245 |     def parse_file(self, path):
246 |         with open(path, 'r') as f:
247 |             self.parse_string(f.read())
248 | 
249 |     def plot_all_frames(self):
250 |         import matplotlib.pyplot as plt
251 |         from mpl_toolkits.mplot3d import axes3d, Axes3D
252 |         fig = plt.figure()
253 |         ax = fig.add_subplot(111, projection='3d')
254 |         for i in range(self.frames):
255 |             self.plot_frame(i, fig, ax)
256 | 
257 |     def __repr__(self):
258 |         return f"BVH {len(self.joints.keys())} joints, {self.frames} frames"
259 | 
260 | 
261 | if __name__ == '__main__':
262 |     # create Bvh parser
263 |     anim = Bvh()
264 |     # parser file
265 |     anim.parse_file("example.bvh")
266 | 
267 |     # draw the skeleton in T-pose
268 |     anim.plot_hierarchy()
269 | 
270 |     # extract single frame pose: axis0=joint, axis1=positionXYZ/rotationXYZ
271 |     p, r = anim.frame_pose(0)
272 | 
273 |     # extract all poses: axis0=frame, axis1=joint, axis2=positionXYZ/rotationXYZ
274 |     all_p, all_r = anim.all_frame_poses()
275 | 
276 |     # print all joints, their positions and orientations
277 |     for _p, _r, _j in zip(p, r, anim.joint_names()):
278 |         print(f"{_j}: p={_p}, r={_r}")
279 | 
280 |     # draw the skeleton for the given frame
281 |     anim.plot_frame(22)
282 | 
283 |     # show full animation
284 |     anim.plot_all_frames()
285 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os
  3 | import shutil
  4 | import numpy as np
  5 | from scipy.spatial.transform import Rotation
  6 | from copy import deepcopy
  7 | import torch
  8 | import cv2
  9 | 
 10 | 
 11 | lines = [(0, 1, 'right'), (0, 4, 'left'), (1, 2, 'right'), (2, 3, 'right'), (4, 5, 'left'), (5, 6, 'left'), (1, 7, 'right'), (4, 11, 'left'), (7, 8, 'right'), (8, 9, 'right'), (9, 10, 'right'),
 12 |                       (11, 12, 'left'), (12, 13, 'left'), (13, 14, 'left'), (7, 11, 'left')]
 13 | 
 14 | def draw_joints(joints, img, color=(0, 255, 0), right_color=(255, 0, 0)):
 15 |     joints_num = joints.shape[0]
 16 |     for line in lines:
 17 |         if line[0] < joints_num and line[1] < joints_num:
 18 |             start = joints[line[0]].astype(np.int)
 19 |             end = joints[line[1]].astype(np.int)
 20 |             left_or_right = line[2]
 21 |             if left_or_right == 'right':
 22 |                 paint_color = right_color
 23 |             else:
 24 |                 paint_color = color
 25 |             img = cv2.line(img, (start[0], start[1]), (end[0], end[1]), color=paint_color, thickness=4)
 26 |     for j in range(joints_num):
 27 |         img = cv2.circle(img, center=(joints[j][0].astype(np.int), joints[j][1].astype(np.int)),
 28 |                          radius=2, color=(0, 0, 255), thickness=6)
 29 | 
 30 |     return img
 31 | 
 32 | 
 33 | def trans_qrot_to_matrix(trans, rot):
 34 |     # rot is quat
 35 |     rot_mat = Rotation.from_quat(rot).as_matrix()
 36 |     mat = np.array([
 37 |         np.concatenate([rot_mat[0], [trans[0]]]),
 38 |         np.concatenate([rot_mat[1], [trans[1]]]),
 39 |         np.concatenate([rot_mat[2], [trans[2]]]),
 40 |         [0, 0, 0, 1]
 41 |     ])
 42 |     return mat
 43 | 
 44 | def transformation_matrix_to_translation_and_rotation(mat):
 45 |     rotation_matrix = mat[:3, :3]
 46 |     translation = mat[:3, 3]
 47 | 
 48 |     # rotation matrix to rotation euler
 49 |     rotation_euler = Rotation.from_matrix(rotation_matrix).as_euler('xyz')
 50 |     return rotation_euler, translation
 51 | 
 52 | 
 53 | def local_skeleton_2_global_skeleton(local_pose_list, cam_2_world_mat):
 54 |     pass
 55 | 
 56 | def global_skeleton_2_local_skeleton(global_pose, world_2_cam_mat):
 57 |     global_pose_homo = np.concatenate([global_pose, np.ones(1)], axis=1)
 58 |     local_pose_homo = world_2_cam_mat.dot(global_pose_homo.T).T
 59 |     return local_pose_homo
 60 | 
 61 | 
 62 | def transform_pose(pose, matrix):
 63 |     pose_homo = np.concatenate([pose, np.ones(shape=(pose.shape[0], 1))], axis=1)
 64 |     new_pose_homo = matrix.dot(pose_homo.T).T
 65 |     new_pose = new_pose_homo[:, :3]
 66 |     return new_pose
 67 | 
 68 | def transform_pose_torch(pose, matrix):
 69 |     to_attach = torch.ones(size=(pose.shape[0], 1)).to(pose.device)
 70 |     pose_homo = torch.cat([pose, to_attach], dim=1)
 71 |     new_pose_homo = matrix.mm(pose_homo.T).T
 72 |     new_pose = new_pose_homo[:, :3]
 73 |     return new_pose
 74 | 
 75 | def get_concecutive_global_cam(cam_seq, last_cam):
 76 |     camera_matrix_0 = deepcopy(cam_seq[0])
 77 |     concecutive_global_cam = np.empty_like(cam_seq)
 78 |     camera_matrix_0_inv = np.linalg.inv(camera_matrix_0)
 79 |     for i, camera_pose_i in enumerate(cam_seq):
 80 |         camera_matrix_i = camera_pose_i
 81 |         camera_matrix_i_2_last = last_cam.dot(camera_matrix_0_inv.dot(camera_matrix_i))
 82 |         concecutive_global_cam[i] = camera_matrix_i_2_last
 83 |     return concecutive_global_cam
 84 | 
 85 | def get_relative_global_pose(local_pose_list, camera_pose_list):
 86 |     # firstly get relative camera pose list
 87 |     relative_pose_list = []
 88 |     camera_pose_0 = deepcopy(camera_pose_list[0])
 89 |     camera_matrix_0 = trans_qrot_to_matrix(camera_pose_0['loc'], camera_pose_0['rot'])
 90 |     camera_matrix_0_inv = np.linalg.inv(camera_matrix_0)
 91 |     for i, camera_pose_i in enumerate(camera_pose_list):
 92 |         camera_matrix_i = trans_qrot_to_matrix(camera_pose_i['loc'], camera_pose_i['rot'])
 93 |         camera_matrix_i_2_0 = camera_matrix_0_inv.dot(camera_matrix_i)
 94 |         local_pose = local_pose_list[i]
 95 |         transformed_local_pose = transform_pose(local_pose, camera_matrix_i_2_0)
 96 |         relative_pose_list.append(transformed_local_pose)
 97 |     return relative_pose_list
 98 | 
 99 | def get_relative_global_pose_with_camera_matrix(local_pose_list, camera_pose_list):
100 |     # firstly get relative camera pose list
101 |     relative_pose_list = []
102 |     camera_pose_0 = deepcopy(camera_pose_list[0])
103 |     camera_matrix_0 = camera_pose_0
104 |     camera_matrix_0_inv = np.linalg.inv(camera_matrix_0)
105 |     for i, camera_pose_i in enumerate(camera_pose_list):
106 |         camera_matrix_i = camera_pose_i
107 |         camera_matrix_i_2_0 = camera_matrix_0_inv.dot(camera_matrix_i)
108 |         local_pose = local_pose_list[i]
109 |         transformed_local_pose = transform_pose(local_pose, camera_matrix_i_2_0)
110 |         relative_pose_list.append(transformed_local_pose)
111 |     
112 |     return np.asarray(relative_pose_list)
113 | 
114 | def get_global_pose_from_relative_global_pose(relative_global_pose_list, initial_camera_matrix):
115 |     global_pose_list = np.zeros_like(relative_global_pose_list)
116 |     for i, relative_global_pose in enumerate(relative_global_pose_list):
117 |         global_pose = transform_pose(relative_global_pose, initial_camera_matrix)
118 |         global_pose_list[i] = global_pose
119 |     return global_pose_list
120 | 
121 | def get_relative_camera_matrix(camera_pose_1, camera_pose_2):
122 |     camera_matrix_1_inv = torch.inverse(camera_pose_1)
123 |     camera_matrix_2_to_1 = camera_matrix_1_inv @ camera_pose_2
124 |     return camera_matrix_2_to_1
125 | 
126 | def get_relative_global_pose_with_camera_matrix_torch(local_pose_list, camera_pose_list):
127 |     # firstly get relative camera pose list
128 |     # relative_pose_list = []
129 |     relative_pose_list = torch.zeros_like(local_pose_list)
130 |     camera_pose_0 = camera_pose_list[0].detach().clone()
131 |     camera_matrix_0 = camera_pose_0
132 |     camera_matrix_0_inv = torch.inverse(camera_matrix_0)
133 |     for i, camera_pose_i in enumerate(camera_pose_list):
134 |         camera_matrix_i = camera_pose_i
135 |         camera_matrix_i_2_0 = camera_matrix_0_inv.mm(camera_matrix_i)
136 |         local_pose = local_pose_list[i]
137 |         transformed_local_pose = transform_pose_torch(local_pose, camera_matrix_i_2_0)
138 |         relative_pose_list[i] = transformed_local_pose
139 |     return relative_pose_list
140 | 
141 | def get_relative_transform_from_blender(location1, rotation1, location2, rotation2):
142 |     # input: location and rotation in blender coordinate
143 |     # out: rotation, translation and transform matrix in OpenCV coordinate
144 |     T_world2cv1, R_world2cv1, mat_world2cv1 = get_cv_rt_from_blender(location1, rotation1)
145 |     T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_blender(location2, rotation2)
146 | 
147 |     mat_cv1_2world = np.linalg.inv(mat_world2cv1)
148 | 
149 |     mat_cv1_to_cv2 = mat_cv1_2world.dot(mat_world2cv2)
150 |     # mat cv1 to cv2 is the coordinate transformation, we need to change it to the object transformation
151 |     mat_cv2_to_cv1 = np.linalg.inv(mat_cv1_to_cv2)
152 | 
153 |     rotation, translation = transformation_matrix_to_translation_and_rotation(mat_cv2_to_cv1)
154 |     return rotation, translation, mat_cv2_to_cv1
155 | 
156 | 
157 | def get_transform_relative_to_base_cv(base_location, base_rotation, location, rotation):
158 |     # input: location and rotation in blender coordinate
159 |     # out: rotation, translation and transform matrix in OpenCV coordinate
160 |     T_world2cv_base, R_world2cv_base, mat_world2cv_base = get_cv_rt_from_cv(base_location, base_rotation)
161 |     T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_cv(location, rotation)
162 | 
163 |     # mat_cv2world2 = np.linalg.inv(mat_world2cv2)
164 |     # location_cv_homo = mat_cv2world2[:, 3]
165 | 
166 |     location_cv_homo = np.concatenate([location, np.ones(1)])
167 | 
168 |     R_cv2_2_base = R_world2cv2.T.dot(R_world2cv_base)
169 |     new_rotation_euler = Rotation.from_matrix(R_cv2_2_base).as_euler(seq='xyz')
170 | 
171 |     new_location_homo = mat_world2cv_base.dot(location_cv_homo)
172 |     new_location = new_location_homo[:3]
173 | 
174 |     return new_location, new_rotation_euler
175 | 
176 | def get_transform_relative_to_base_blender(base_location, base_rotation, location, rotation):
177 |     T_world2cv_base, R_world2cv_base, mat_world2cv_base = get_cv_rt_from_blender(base_location, base_rotation)
178 |     T_world2cv2, R_world2cv2, mat_world2cv2 = get_cv_rt_from_blender(location, rotation)
179 | 
180 |     location_cv_homo = np.concatenate([location, np.ones(1)])
181 | 
182 |     R_cv2_2_base = R_world2cv2.T.dot(R_world2cv_base)
183 |     new_rotation_euler = Rotation.from_matrix(R_cv2_2_base).as_euler(seq='xyz')
184 | 
185 |     new_location_homo = mat_world2cv_base.dot(location_cv_homo)
186 |     new_location = new_location_homo[:3]
187 | 
188 |     return new_location, new_rotation_euler
189 | 
190 | # code modified from zaw lin
191 | def get_cv_rt_from_blender(location, rotation):
192 |     # bcam stands for blender camera
193 |     R_bcam2cv = np.array(
194 |         [[1, 0, 0],
195 |          [0, -1, 0],
196 |          [0, 0, -1]])
197 | 
198 |     # Transpose since the rotation is object rotation,
199 |     # and we want coordinate rotation
200 |     # R_world2bcam = cam.rotation_euler.to_matrix().transposed()
201 |     # T_world2bcam = -1*R_world2bcam * location
202 |     #
203 |     # Use matrix_world instead to account for all constraints
204 | 
205 |     R_world2bcam = Rotation.from_euler('xyz', rotation, degrees=False).as_matrix().T
206 | 
207 |     # Convert camera location to translation vector used in coordinate changes
208 |     # T_world2bcam = -1*R_world2bcam*cam.location
209 |     # Use location from matrix_world to account for constraints:
210 |     T_world2bcam = -1 * R_world2bcam.dot(location)
211 | 
212 |     # Build the coordinate transform matrix from world to computer vision camera
213 |     R_world2cv = R_bcam2cv.dot(R_world2bcam)
214 |     T_world2cv = R_bcam2cv.dot(T_world2bcam)
215 | 
216 |     #put into 3x4 matrix
217 |     mat = np.array([
218 |         np.concatenate([R_world2cv[0], [T_world2cv[0]]]),
219 |         np.concatenate([R_world2cv[1], [T_world2cv[1]]]),
220 |         np.concatenate([R_world2cv[2], [T_world2cv[2]]]),
221 |         [0, 0, 0, 1]
222 |     ])
223 |     return T_world2cv, R_world2cv, mat
224 | 
225 | # code modified from zaw lin
226 | def get_cv_rt_from_cv(location, rotation):
227 | 
228 |     # Transpose since the rotation is object rotation,
229 |     # and we want coordinate rotation
230 |     # R_world2bcam = cam.rotation_euler.to_matrix().transposed()
231 |     # T_world2bcam = -1*R_world2bcam * location
232 |     #
233 |     # Use matrix_world instead to account for all constraints
234 | 
235 |     R_world2cv = Rotation.from_euler('xyz', rotation, degrees=False).as_matrix().T
236 | 
237 |     # Convert camera location to translation vector used in coordinate changes
238 |     # T_world2bcam = -1*R_world2bcam*cam.location
239 |     # Use location from matrix_world to account for constraints:
240 |     T_world2cv = -1 * R_world2cv.dot(location)
241 | 
242 |     #put into 3x4 matrix
243 |     mat = np.array([
244 |         np.concatenate([R_world2cv[0], [T_world2cv[0]]]),
245 |         np.concatenate([R_world2cv[1], [T_world2cv[1]]]),
246 |         np.concatenate([R_world2cv[2], [T_world2cv[2]]]),
247 |         [0, 0, 0, 1]
248 |     ])
249 |     return T_world2cv, R_world2cv, mat


--------------------------------------------------------------------------------
/utils/skeleton.py:
--------------------------------------------------------------------------------
  1 | # pose visualizer
  2 | # 1. read and generate 3D skeleton from heat map and depth
  3 | # 2. convert 3D skeleton to skeleton mesh
  4 | from utils.fisheye.FishEyeEquisolid import FishEyeCameraEquisolid
  5 | from utils.fisheye.FishEyeCalibrated import FishEyeCameraCalibrated
  6 | import numpy as np
  7 | import open3d
  8 | from utils.pose_visualization_utils import get_cylinder, get_sphere
  9 | from scipy.io import loadmat
 10 | import cv2
 11 | import os
 12 | from tqdm import tqdm
 13 | from scipy.ndimage.filters import gaussian_filter1d
 14 | 
 15 | 
 16 | class Skeleton:
 17 |     heatmap_sequence = ["Neck", "Right_shoulder", "Right_elbow", "Right_wrist", "Left_shoulder", "Left_elbow",
 18 |                         "Left_wrist", "Right_hip", "Right_knee", "Right_ankle", "Right_foot", "Left_hip",
 19 |                         "Left_knee", "Left_ankle", "Left_foot"]
 20 |     lines = [(0, 1), (0, 4), (1, 2), (2, 3), (4, 5), (5, 6), (1, 7), (4, 11), (7, 8), (8, 9), (9, 10),
 21 |              (11, 12), (12, 13), (13, 14), (7, 11)]
 22 |     kinematic_parents = [0, 0, 1, 2, 0, 4, 5, 1, 7, 8, 9, 4, 11, 12, 13]
 23 |     
 24 |     def __init__(self, calibration_path):
 25 |         
 26 |         self.skeleton = None
 27 |         self.skeleton_mesh = None
 28 |         if calibration_path is None:
 29 |             self.camera = FishEyeCameraEquisolid(focal_length=9, sensor_size=32, img_size=(1280, 1024))
 30 |         else:
 31 |             self.camera = FishEyeCameraCalibrated(calibration_file_path=calibration_path)
 32 |     
 33 |     def set_skeleton(self, heatmap, depth, bone_length=None, to_mesh=True):
 34 |         heatmap = np.expand_dims(heatmap, axis=0)
 35 |         preds, _ = self.get_max_preds(heatmap)
 36 |         pred = preds[0]
 37 |         
 38 |         points_3d = self.camera.camera2world(pred, depth)
 39 |         # print('------------------------')
 40 |         # print(self.camera.camera2world(np.array([[640, 1000]]), np.array([1])))
 41 |         if bone_length is not None:
 42 |             points_3d = self._skeleton_resize(points_3d, bone_length)
 43 |         self.skeleton = points_3d
 44 |         if to_mesh:
 45 |             self.skeleton_to_mesh()
 46 |         return self.skeleton
 47 |     
 48 |     def joints_2_mesh(self, joints_3d):
 49 |         self.skeleton = joints_3d
 50 |         self.skeleton_to_mesh()
 51 |         skeleton_mesh = self.skeleton_mesh
 52 |         self.skeleton_mesh = None
 53 |         self.skeleton = None
 54 |         return skeleton_mesh
 55 |     
 56 |     def joint_list_2_mesh_list(self, joints_3d_list):
 57 |         mesh_list = []
 58 |         for joints_3d in joints_3d_list:
 59 |             mesh_list.append(self.joints_2_mesh(joints_3d))
 60 |         return mesh_list
 61 |     
 62 |     def get_skeleton_mesh(self):
 63 |         if self.skeleton_mesh is None:
 64 |             raise Exception("Skeleton is not prepared.")
 65 |         else:
 66 |             return self.skeleton_mesh
 67 |     
 68 |     def save_skeleton_mesh(self, out_path):
 69 |         if self.skeleton_mesh is None:
 70 |             raise Exception("Skeleton is not prepared.")
 71 |         else:
 72 |             open3d.io.write_triangle_mesh(out_path, mesh=self.skeleton_mesh)
 73 |     
 74 |     def set_skeleton_from_file(self, heatmap_file, depth_file, bone_length_file=None, to_mesh=True):
 75 |         # load the average bone length
 76 |         if bone_length_file is not None:
 77 |             bone_length_mat = loadmat(bone_length_file)
 78 |             mean3D = bone_length_mat['mean3D'].T  # convert shape to 15 * 3
 79 |             bones_mean = mean3D - mean3D[self.kinematic_parents, :]
 80 |             bone_length = np.linalg.norm(bones_mean, axis=1)
 81 |         else:
 82 |             bone_length = None
 83 |         heatmap_mat = loadmat(heatmap_file)
 84 |         depth_mat = loadmat(depth_file)
 85 |         depth = depth_mat['depth'][0]
 86 |         heatmap = heatmap_mat['heatmap']
 87 |         heatmap = cv2.resize(heatmap, dsize=(1024, 1024), interpolation=cv2.INTER_NEAREST)
 88 |         heatmap = np.pad(heatmap, ((0, 0), (128, 128), (0, 0)), 'constant', constant_values=0)
 89 |         heatmap = heatmap.transpose((2, 0, 1))
 90 |         return self.set_skeleton(heatmap, depth, bone_length, to_mesh)
 91 |     
 92 |     def skeleton_resize_seq(self, joint_list, bone_length_file):
 93 |         bone_length_mat = loadmat(bone_length_file)
 94 |         mean3D = bone_length_mat['mean3D'].T  # convert shape to 15 * 3
 95 |         bones_mean = mean3D - mean3D[self.kinematic_parents, :]
 96 |         bone_length = np.linalg.norm(bones_mean, axis=1)
 97 |         
 98 |         for i in range(len(joint_list)):
 99 |             joint_list[i] = self._skeleton_resize(joint_list[i], bone_length)
100 |         return joint_list
101 |     
102 |     def skeleton_resize_single(self, joint, bone_length_file):
103 |         bone_length_mat = loadmat(bone_length_file)
104 |         mean3D = bone_length_mat['mean3D'].T  # convert shape to 15 * 3
105 |         bones_mean = mean3D - mean3D[self.kinematic_parents, :]
106 |         bone_length = np.linalg.norm(bones_mean, axis=1)
107 |         
108 |         joint = self._skeleton_resize(joint, bone_length)
109 |         return joint
110 |     
111 |     def skeleton_resize_standard_skeleton(self, joint_input, joint_standard):
112 |         """
113 |         
114 |         :param joint_input: input joint shape: 15 * 3
115 |         :param joint_standard: standard joint shape: 15 * 3
116 |         :return:
117 |         """
118 |         bones_mean = joint_standard - joint_standard[self.kinematic_parents, :]
119 |         bone_length = np.linalg.norm(bones_mean, axis=1) * 1000.
120 |     
121 |         joint = self._skeleton_resize(joint_input, bone_length)
122 |         return joint
123 |     
124 |     def _skeleton_resize(self, points_3d, bone_length):
125 |         # resize the skeleton to the normal size (why we should do that?)
126 |         estimated_bone_vec = points_3d - points_3d[self.kinematic_parents, :]
127 |         estimated_bone_length = np.linalg.norm(estimated_bone_vec, axis=1)
128 |         multi = bone_length[1:] / estimated_bone_length[1:]
129 |         multi = np.concatenate(([0], multi))
130 |         multi = np.stack([multi] * 3, axis=1)
131 |         resized_bones_vec = estimated_bone_vec * multi / 1000
132 |         
133 |         joints_rescaled = points_3d
134 |         for i in range(joints_rescaled.shape[0]):
135 |             joints_rescaled[i, :] = joints_rescaled[self.kinematic_parents[i], :] + resized_bones_vec[i, :]
136 |         return joints_rescaled
137 |     
138 |     def render(self):
139 |         mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=1)
140 |         open3d.visualization.draw_geometries([self.skeleton_mesh, mesh_frame])
141 |     
142 |     def skeleton_to_mesh(self):
143 |         final_mesh = open3d.geometry.TriangleMesh()
144 |         for i in range(len(self.skeleton)):
145 |             keypoint_mesh = get_sphere(position=self.skeleton[i], radius=0.02)
146 |             final_mesh = final_mesh + keypoint_mesh
147 |         
148 |         for line in self.lines:
149 |             line_start_i = line[0]
150 |             line_end_i = line[1]
151 |             
152 |             start_point = self.skeleton[line_start_i]
153 |             end_point = self.skeleton[line_end_i]
154 |             
155 |             line_mesh = get_cylinder(start_point, end_point, radius=0.005)
156 |             final_mesh += line_mesh
157 |         self.skeleton_mesh = final_mesh
158 |         return final_mesh
159 |     
160 |     def smooth(self, pose_sequence, sigma):
161 |         """
162 |         gaussian smooth pose
163 |         :param pose_sequence_2d: pose sequence, input is a list with every element is 15 * 2 body pose
164 |         :param kernel_size: kernel size of guassian smooth
165 |         :return: smoothed 2d pose
166 |         """
167 |         pose_sequence = np.asarray(pose_sequence)
168 |         pose_sequence_result = np.zeros_like(pose_sequence)
169 |         keypoint_num = pose_sequence.shape[1]
170 |         for i in range(keypoint_num):
171 |             pose_sequence_i = pose_sequence[:, i, :]
172 |             pose_sequence_filtered = gaussian_filter1d(pose_sequence_i, sigma, axis=0)
173 |             pose_sequence_result[:, i, :] = pose_sequence_filtered
174 |         return pose_sequence_result
175 |     
176 |     def get_max_preds(self, batch_heatmaps):
177 |         '''
178 |         get predictions from score maps
179 |         heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
180 |         '''
181 |         assert isinstance(batch_heatmaps, np.ndarray), \
182 |             'batch_heatmaps should be numpy.ndarray'
183 |         assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
184 |         
185 |         batch_size = batch_heatmaps.shape[0]
186 |         num_joints = batch_heatmaps.shape[1]
187 |         width = batch_heatmaps.shape[3]
188 |         heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
189 |         idx = np.argmax(heatmaps_reshaped, 2)
190 |         maxvals = np.amax(heatmaps_reshaped, 2)
191 |         
192 |         maxvals = maxvals.reshape((batch_size, num_joints, 1))
193 |         idx = idx.reshape((batch_size, num_joints, 1))
194 |         
195 |         preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
196 |         
197 |         preds[:, :, 0] = (preds[:, :, 0]) % width
198 |         preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
199 |         
200 |         pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
201 |         pred_mask = pred_mask.astype(np.float32)
202 |         
203 |         preds *= pred_mask
204 |         return preds, maxvals
205 | 
206 | 
207 | if __name__ == '__main__':
208 |     skeleton = Skeleton(
209 |         calibration_path='/home/wangjian/Develop/egocentricvisualization/pose/fisheye/fisheye.calibration.json')
210 |     data_path = r'/home/wangjian/Develop/egocentricvisualization/data_2'
211 |     heatmap_dir = os.path.join(data_path, 'heatmaps')
212 |     depth_dir = os.path.join(data_path, 'depths')
213 |     out_dir = os.path.join(data_path, 'smooth_skeleton_mesh')
214 |     if not os.path.isdir(out_dir):
215 |         os.mkdir(out_dir)
216 |     skeleon_list = []
217 |     out_path_list = []
218 |     for heatmap_name in tqdm(sorted(os.listdir(heatmap_dir))):
219 |         heatmap_path = os.path.join(heatmap_dir, heatmap_name)
220 |         mat_id = heatmap_name
221 |         depth_path = os.path.join(depth_dir, mat_id)
222 |         
223 |         skeleton_array = skeleton.set_skeleton_from_file(heatmap_path,
224 |                                                          depth_path,
225 |                                                          # bone_length_file=r'/home/wangjian/Develop/egocentricvisualization/pose/fisheye/mean3D.mat',
226 |                                                          to_mesh=False)
227 |         out_path = os.path.join(out_dir, mat_id + ".ply")
228 |         skeleon_list.append(skeleton_array)
229 |         out_path_list.append(out_path)
230 |     
231 |     smoothed_skeleton = skeleton.smooth(skeleon_list, sigma=1)
232 |     print("saving to ply")
233 |     for i in tqdm(range(len(smoothed_skeleton))):
234 |         skeleton.skeleton = smoothed_skeleton[i]
235 |         skeleton.skeleton_to_mesh()
236 |         skeleton.save_skeleton_mesh(out_path_list[i])
237 |     
238 |     # skeleton.set_skeleton_from_file(r'X:\Mo2Cap2Plus\static00\Datasets\Mo2Cap2\ego_system_test\sitting\heatmaps\img-04052020001910-937.mat',
239 |     #                                 r'X:\Mo2Cap2Plus\static00\Datasets\Mo2Cap2\ego_system_test\sitting\depths\img-04052020001910-937.mat',
240 |     #                                 # bone_length_file=r'F:\Develop\EgocentricSystemVisualization\pose\fisheye\mean3D.mat')
241 |     #                                 )
242 |     #
243 |     # skeleton.render()
244 |     # print(skeleton.skeleton)
245 | 


--------------------------------------------------------------------------------
/MakeDataForOptimization/utils/skeleton.py:
--------------------------------------------------------------------------------
  1 | # pose visualizer
  2 | # 1. read and generate 3D skeleton from heat map and depth
  3 | # 2. convert 3D skeleton to skeleton mesh
  4 | from utils.fisheye.FishEyeEquisolid import FishEyeCameraEquisolid
  5 | from utils.fisheye.FishEyeCalibrated import FishEyeCameraCalibrated
  6 | import numpy as np
  7 | import open3d
  8 | from utils.pose_visualization_utils import get_cylinder, get_sphere
  9 | from scipy.io import loadmat
 10 | import cv2
 11 | import os
 12 | from tqdm import tqdm
 13 | from scipy.ndimage.filters import gaussian_filter1d
 14 | 
 15 | 
 16 | class Skeleton:
 17 |     heatmap_sequence = ["Neck", "Right_shoulder", "Right_elbow", "Right_wrist", "Left_shoulder", "Left_elbow",
 18 |                         "Left_wrist", "Right_hip", "Right_knee", "Right_ankle", "Right_foot", "Left_hip",
 19 |                         "Left_knee", "Left_ankle", "Left_foot"]
 20 |     lines = [(0, 1), (0, 4), (1, 2), (2, 3), (4, 5), (5, 6), (1, 7), (4, 11), (7, 8), (8, 9), (9, 10),
 21 |              (11, 12), (12, 13), (13, 14), (7, 11)]
 22 |     kinematic_parents = [0, 0, 1, 2, 0, 4, 5, 1, 7, 8, 9, 4, 11, 12, 13]
 23 |     
 24 |     def __init__(self, calibration_path):
 25 |         
 26 |         self.skeleton = None
 27 |         self.skeleton_mesh = None
 28 |         if calibration_path is None:
 29 |             self.camera = FishEyeCameraEquisolid(focal_length=9, sensor_size=32, img_size=(1280, 1024))
 30 |         else:
 31 |             self.camera = FishEyeCameraCalibrated(calibration_file_path=calibration_path)
 32 |     
 33 |     def set_skeleton(self, heatmap, depth, bone_length=None, to_mesh=True):
 34 |         heatmap = np.expand_dims(heatmap, axis=0)
 35 |         preds, _ = self.get_max_preds(heatmap)
 36 |         pred = preds[0]
 37 |         
 38 |         points_3d = self.camera.camera2world(pred, depth)
 39 |         # print('------------------------')
 40 |         # print(self.camera.camera2world(np.array([[640, 1000]]), np.array([1])))
 41 |         if bone_length is not None:
 42 |             points_3d = self._skeleton_resize(points_3d, bone_length)
 43 |         self.skeleton = points_3d
 44 |         if to_mesh:
 45 |             self.skeleton_to_mesh()
 46 |         return self.skeleton
 47 |     
 48 |     def joints_2_mesh(self, joints_3d):
 49 |         self.skeleton = joints_3d
 50 |         self.skeleton_to_mesh()
 51 |         skeleton_mesh = self.skeleton_mesh
 52 |         self.skeleton_mesh = None
 53 |         self.skeleton = None
 54 |         return skeleton_mesh
 55 |     
 56 |     def joint_list_2_mesh_list(self, joints_3d_list):
 57 |         mesh_list = []
 58 |         for joints_3d in joints_3d_list:
 59 |             mesh_list.append(self.joints_2_mesh(joints_3d))
 60 |         return mesh_list
 61 |     
 62 |     def get_skeleton_mesh(self):
 63 |         if self.skeleton_mesh is None:
 64 |             raise Exception("Skeleton is not prepared.")
 65 |         else:
 66 |             return self.skeleton_mesh
 67 |     
 68 |     def save_skeleton_mesh(self, out_path):
 69 |         if self.skeleton_mesh is None:
 70 |             raise Exception("Skeleton is not prepared.")
 71 |         else:
 72 |             open3d.io.write_triangle_mesh(out_path, mesh=self.skeleton_mesh)
 73 |     
 74 |     def set_skeleton_from_file(self, heatmap_file, depth_file, bone_length_file=None, to_mesh=True):
 75 |         # load the average bone length
 76 |         if bone_length_file is not None:
 77 |             bone_length_mat = loadmat(bone_length_file)
 78 |             mean3D = bone_length_mat['mean3D'].T  # convert shape to 15 * 3
 79 |             bones_mean = mean3D - mean3D[self.kinematic_parents, :]
 80 |             bone_length = np.linalg.norm(bones_mean, axis=1)
 81 |         else:
 82 |             bone_length = None
 83 |         heatmap_mat = loadmat(heatmap_file)
 84 |         depth_mat = loadmat(depth_file)
 85 |         depth = depth_mat['depth'][0]
 86 |         heatmap = heatmap_mat['heatmap']
 87 |         heatmap = cv2.resize(heatmap, dsize=(1024, 1024), interpolation=cv2.INTER_NEAREST)
 88 |         heatmap = np.pad(heatmap, ((0, 0), (128, 128), (0, 0)), 'constant', constant_values=0)
 89 |         heatmap = heatmap.transpose((2, 0, 1))
 90 |         return self.set_skeleton(heatmap, depth, bone_length, to_mesh)
 91 |     
 92 |     def skeleton_resize_seq(self, joint_list, bone_length_file):
 93 |         bone_length_mat = loadmat(bone_length_file)
 94 |         mean3D = bone_length_mat['mean3D'].T  # convert shape to 15 * 3
 95 |         bones_mean = mean3D - mean3D[self.kinematic_parents, :]
 96 |         bone_length = np.linalg.norm(bones_mean, axis=1)
 97 |         
 98 |         for i in range(len(joint_list)):
 99 |             joint_list[i] = self._skeleton_resize(joint_list[i], bone_length)
100 |         return joint_list
101 |     
102 |     def skeleton_resize_single(self, joint, bone_length_file):
103 |         bone_length_mat = loadmat(bone_length_file)
104 |         mean3D = bone_length_mat['mean3D'].T  # convert shape to 15 * 3
105 |         bones_mean = mean3D - mean3D[self.kinematic_parents, :]
106 |         bone_length = np.linalg.norm(bones_mean, axis=1)
107 |         
108 |         joint = self._skeleton_resize(joint, bone_length)
109 |         return joint
110 |     
111 |     def skeleton_resize_standard_skeleton(self, joint_input, joint_standard):
112 |         """
113 |         
114 |         :param joint_input: input joint shape: 15 * 3
115 |         :param joint_standard: standard joint shape: 15 * 3
116 |         :return:
117 |         """
118 |         bones_mean = joint_standard - joint_standard[self.kinematic_parents, :]
119 |         bone_length = np.linalg.norm(bones_mean, axis=1) * 1000.
120 |     
121 |         joint = self._skeleton_resize(joint_input, bone_length)
122 |         return joint
123 |     
124 |     def _skeleton_resize(self, points_3d, bone_length):
125 |         # resize the skeleton to the normal size (why we should do that?)
126 |         estimated_bone_vec = points_3d - points_3d[self.kinematic_parents, :]
127 |         estimated_bone_length = np.linalg.norm(estimated_bone_vec, axis=1)
128 |         multi = bone_length[1:] / estimated_bone_length[1:]
129 |         multi = np.concatenate(([0], multi))
130 |         multi = np.stack([multi] * 3, axis=1)
131 |         resized_bones_vec = estimated_bone_vec * multi / 1000
132 |         
133 |         joints_rescaled = points_3d
134 |         for i in range(joints_rescaled.shape[0]):
135 |             joints_rescaled[i, :] = joints_rescaled[self.kinematic_parents[i], :] + resized_bones_vec[i, :]
136 |         return joints_rescaled
137 |     
138 |     def render(self):
139 |         mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=1)
140 |         open3d.visualization.draw_geometries([self.skeleton_mesh, mesh_frame])
141 |     
142 |     def skeleton_to_mesh(self):
143 |         final_mesh = open3d.geometry.TriangleMesh()
144 |         for i in range(len(self.skeleton)):
145 |             keypoint_mesh = get_sphere(position=self.skeleton[i], radius=0.02)
146 |             final_mesh = final_mesh + keypoint_mesh
147 |         
148 |         for line in self.lines:
149 |             line_start_i = line[0]
150 |             line_end_i = line[1]
151 |             
152 |             start_point = self.skeleton[line_start_i]
153 |             end_point = self.skeleton[line_end_i]
154 |             
155 |             line_mesh = get_cylinder(start_point, end_point, radius=0.005)
156 |             final_mesh += line_mesh
157 |         self.skeleton_mesh = final_mesh
158 |         return final_mesh
159 |     
160 |     def smooth(self, pose_sequence, sigma):
161 |         """
162 |         gaussian smooth pose
163 |         :param pose_sequence_2d: pose sequence, input is a list with every element is 15 * 2 body pose
164 |         :param kernel_size: kernel size of guassian smooth
165 |         :return: smoothed 2d pose
166 |         """
167 |         pose_sequence = np.asarray(pose_sequence)
168 |         pose_sequence_result = np.zeros_like(pose_sequence)
169 |         keypoint_num = pose_sequence.shape[1]
170 |         for i in range(keypoint_num):
171 |             pose_sequence_i = pose_sequence[:, i, :]
172 |             pose_sequence_filtered = gaussian_filter1d(pose_sequence_i, sigma, axis=0)
173 |             pose_sequence_result[:, i, :] = pose_sequence_filtered
174 |         return pose_sequence_result
175 |     
176 |     def get_max_preds(self, batch_heatmaps):
177 |         '''
178 |         get predictions from score maps
179 |         heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
180 |         '''
181 |         assert isinstance(batch_heatmaps, np.ndarray), \
182 |             'batch_heatmaps should be numpy.ndarray'
183 |         assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
184 |         
185 |         batch_size = batch_heatmaps.shape[0]
186 |         num_joints = batch_heatmaps.shape[1]
187 |         width = batch_heatmaps.shape[3]
188 |         heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
189 |         idx = np.argmax(heatmaps_reshaped, 2)
190 |         maxvals = np.amax(heatmaps_reshaped, 2)
191 |         
192 |         maxvals = maxvals.reshape((batch_size, num_joints, 1))
193 |         idx = idx.reshape((batch_size, num_joints, 1))
194 |         
195 |         preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
196 |         
197 |         preds[:, :, 0] = (preds[:, :, 0]) % width
198 |         preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
199 |         
200 |         pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
201 |         pred_mask = pred_mask.astype(np.float32)
202 |         
203 |         preds *= pred_mask
204 |         return preds, maxvals
205 | 
206 | 
207 | if __name__ == '__main__':
208 |     skeleton = Skeleton(
209 |         calibration_path='/home/wangjian/Develop/egocentricvisualization/pose/fisheye/fisheye.calibration.json')
210 |     data_path = r'/home/wangjian/Develop/egocentricvisualization/data_2'
211 |     heatmap_dir = os.path.join(data_path, 'heatmaps')
212 |     depth_dir = os.path.join(data_path, 'depths')
213 |     out_dir = os.path.join(data_path, 'smooth_skeleton_mesh')
214 |     if not os.path.isdir(out_dir):
215 |         os.mkdir(out_dir)
216 |     skeleon_list = []
217 |     out_path_list = []
218 |     for heatmap_name in tqdm(sorted(os.listdir(heatmap_dir))):
219 |         heatmap_path = os.path.join(heatmap_dir, heatmap_name)
220 |         mat_id = heatmap_name
221 |         depth_path = os.path.join(depth_dir, mat_id)
222 |         
223 |         skeleton_array = skeleton.set_skeleton_from_file(heatmap_path,
224 |                                                          depth_path,
225 |                                                          # bone_length_file=r'/home/wangjian/Develop/egocentricvisualization/pose/fisheye/mean3D.mat',
226 |                                                          to_mesh=False)
227 |         out_path = os.path.join(out_dir, mat_id + ".ply")
228 |         skeleon_list.append(skeleton_array)
229 |         out_path_list.append(out_path)
230 |     
231 |     smoothed_skeleton = skeleton.smooth(skeleon_list, sigma=1)
232 |     print("saving to ply")
233 |     for i in tqdm(range(len(smoothed_skeleton))):
234 |         skeleton.skeleton = smoothed_skeleton[i]
235 |         skeleton.skeleton_to_mesh()
236 |         skeleton.save_skeleton_mesh(out_path_list[i])
237 |     
238 |     # skeleton.set_skeleton_from_file(r'X:\Mo2Cap2Plus\static00\Datasets\Mo2Cap2\ego_system_test\sitting\heatmaps\img-04052020001910-937.mat',
239 |     #                                 r'X:\Mo2Cap2Plus\static00\Datasets\Mo2Cap2\ego_system_test\sitting\depths\img-04052020001910-937.mat',
240 |     #                                 # bone_length_file=r'F:\Develop\EgocentricSystemVisualization\pose\fisheye\mean3D.mat')
241 |     #                                 )
242 |     #
243 |     # skeleton.render()
244 |     # print(skeleton.skeleton)
245 | 


--------------------------------------------------------------------------------