├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── aist_plusplus ├── __init__.py ├── features │ ├── __init__.py │ ├── kinetic.py │ ├── manual.py │ └── utils.py ├── loader.py ├── utils.py └── visualizer.py ├── assets └── aist_pipeline.jpg ├── demos ├── extract_motion_feats.py ├── run_dyn_processing.py ├── run_openpose_pipeline.sh └── run_vis.py ├── downloader.py ├── processing ├── requirements.txt ├── run_estimate_camera.py ├── run_estimate_keypoints.py ├── run_estimate_smpl.py ├── run_openpose.py ├── run_preprocessing.py └── run_segmentation.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | # lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | data/ 107 | 108 | .DS_Store -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution, 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AIST++ API 2 | 3 | This repo contains starter code for using the AIST++ dataset. To download the 4 | dataset or explore details of this dataset, please go to our dataset [website](https://google.github.io/aistplusplus_dataset). 5 | 6 | ## Installation 7 | The code has been tested on `python>=3.7`. You can install the dependencies and this repo by: 8 | ``` bash 9 | pip install -r requirements.txt 10 | python setup.py install 11 | ``` 12 | You also need to make sure [ffmpeg](https://ffmpeg.org/download.html) is installed on your machine, if you would like to visualize the annotations using this api. 13 | 14 | ## How to use 15 | We provide demo code for loading and visualizing AIST++ annotations. 16 | Note [AIST++ annotations and 17 | videos](https://google.github.io/aistplusplus_dataset/download.html), 18 | as well as the [SMPL model](https://smpl.is.tue.mpg.de/en) (for SMPL visualization only) are required to run the demo code. 19 | 20 | The directory structure of the data is expected to be: 21 | ``` 22 | 23 | ├── motions/ 24 | ├── keypoints2d/ 25 | ├── keypoints3d/ 26 | ├── splits/ 27 | ├── cameras/ 28 | └── ignore_list.txt 29 | 30 | 31 | └── *.mp4 32 | 33 | 34 | ├── SMPL_MALE.pkl 35 | └── SMPL_FEMALE.pkl 36 | ``` 37 | 38 | #### Visualize 2D keypoints annotation 39 | The command below will plot 2D keypoints onto the raw video and save it to the 40 | directory `./visualization/`. 41 | ``` bash 42 | python demos/run_vis.py \ 43 | --anno_dir \ 44 | --video_dir \ 45 | --save_dir ./visualization/ \ 46 | --video_name gWA_sFM_c01_d27_mWA2_ch21 \ 47 | --mode 2D 48 | ``` 49 | 50 | #### Visualize 3D keypoints annotation 51 | The command below will project 3D keypoints onto the raw video using camera parameters, and save it to the 52 | directory `./visualization/`. 53 | ``` bash 54 | python demos/run_vis.py \ 55 | --anno_dir \ 56 | --video_dir \ 57 | --save_dir ./visualization/ \ 58 | --video_name gWA_sFM_c01_d27_mWA2_ch21 \ 59 | --mode 3D 60 | ``` 61 | 62 | #### Visualize the SMPL joints annotation 63 | The command below will first calculate the SMPL joint locations from our motion 64 | annotations (joint rotations and root trajectories), then project them onto the 65 | raw video and plot. The result will be saved into the directory 66 | `./visualization/`. 67 | ``` bash 68 | python demos/run_vis.py \ 69 | --anno_dir \ 70 | --video_dir \ 71 | --smpl_dir \ 72 | --save_dir ./visualization/ \ 73 | --video_name gWA_sFM_c01_d27_mWA2_ch21 \ 74 | --mode SMPL 75 | ``` 76 | 77 | #### Visualize the SMPL Mesh 78 | The command below will calculate the first frame SMPL mesh from our motion 79 | annotations (joint rotations and root trajectories), and visualize in 3D. 80 | ``` bash 81 | # install some additional libraries for 3D mesh visualization 82 | pip install vedo trimesh 83 | 84 | python demos/run_vis.py \ 85 | --anno_dir \ 86 | --smpl_dir \ 87 | --video_name gWA_sFM_c01_d27_mWA2_ch21 \ 88 | --mode SMPLMesh 89 | ``` 90 | 91 | #### Extract SMPL motion features 92 | The command below will calculate and print two types of features for a motion sequence in SMPL format. We take reference from [fairmotion](https://github.com/facebookresearch/fairmotion/tree/master/fairmotion/tasks/clustering) to calculate the features. 93 | ``` bash 94 | python demos/extract_motion_feats.py \ 95 | --anno_dir \ 96 | --smpl_dir \ 97 | --video_name gWA_sFM_c01_d27_mWA2_ch21 98 | ``` 99 | 100 | #### Multi-view 3D keypoints and motion reconstruction 101 | 102 | This repo also provides code we used for constructing this dataset from the 103 | multi-view [AIST Dance Video Database](https://aistdancedb.ongaaccel.jp/). The 104 | construction pipeline starts with frame-by-frame 2D keypoint detection and 105 | manual camera estimation. Then triangulation and bundle adjustment are applied to optimize the 106 | camera parameters as well as the 3D keypoints. Finally we sequentially fit the SMPL model to 3D keypoints to get a motion sequence represented using joint angles and a root trajectory. The following figure shows our pipeline overview. 107 | 108 |
109 | 110 |

AIST++ construction pipeline overview.

111 |
112 | 113 | The annotations in AIST++ are in [COCO-format](https://cocodataset.org/#home) for 2D \& 3D keypoints, and 114 | [SMPL-format](https://smpl.is.tue.mpg.de/) for human motion annotations. It is designed to serve general 115 | research purposes. However, in some cases you might need the data in different format 116 | (e.g., [Openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) / 117 | [Alphapose](https://github.com/MVIG-SJTU/AlphaPose) keypoints format, or [STAR](https://star.is.tue.mpg.de/) human motion 118 | format). **With the code we provide, it should be easy to construct your own 119 | version of AIST++, with your own keypoint detector or human model definition.** 120 | 121 | **Step 1.** Assume you have your own 2D keypoint detection results stored in ``, you can start by preprocessing the keypoints into the `.pkl` format that we support. The code we used at this step is as follows but you might need to modify the script `run_preprocessing.py` in order to be compatible with your own data. 122 | ``` bash 123 | python processing/run_preprocessing.py \ 124 | --keypoints_dir \ 125 | --save_dir /keypoints2d/ 126 | ``` 127 | 128 | **Step 2.** Then you can estimate the camera parameters using your 2D keypoints. This step 129 | is optional as you can still use our camera parameter estimates which are 130 | quite accurate. At this step, you will need the `/cameras/mapping.txt` file which stores the mapping from videos to different environment settings. 131 | ``` bash 132 | # install some additional libraries 133 | pip install -r processing/requirements.txt 134 | 135 | # If you would like to estimate your own camera parameters: 136 | python processing/run_estimate_camera.py \ 137 | --anno_dir \ 138 | --save_dir /cameras/ 139 | # Or you can skip this step by just using our camera parameter estimates. 140 | ``` 141 | 142 | **Step 3.** Next step is to perform 3D keypoints reconstruction from multi-view 2D keypoints 143 | and camera parameters. You can just run: 144 | ``` bash 145 | python processing/run_estimate_keypoints.py \ 146 | --anno_dir \ 147 | --save_dir /keypoints3d/ 148 | ``` 149 | 150 | **Step 4.** Finally we can estimate SMPL-format human motion data by fitting 151 | the 3D keypoints to the SMPL model. If you would like to use another human model such 152 | as [STAR](https://star.is.tue.mpg.de/), you will need to do some modifications in the script 153 | `run_estimate_smpl.py`. The following command runs SMPL fitting. 154 | ``` bash 155 | python processing/run_estimate_smpl.py \ 156 | --anno_dir \ 157 | --smpl_dir \ 158 | --save_dir /motions/ 159 | ``` 160 | Note that this step will take several days to process the entire dataset if your machine has only one GPU. 161 | In practise, we run this step on a cluster, but are only able to provide the single-threaded version. 162 | 163 | #### MISC. 164 | - COCO-format keypoint definition: 165 | ``` 166 | [ 167 | "nose", 168 | "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder","right_shoulder", 169 | "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", 170 | "left_knee", "right_knee", "left_ankle", "right_ankle" 171 | ] 172 | ``` 173 | 174 | - SMPL-format body joint definition: 175 | ``` 176 | [ 177 | "root", 178 | "lhip", "rhip", "belly", 179 | "lknee", "rknee", "spine", 180 | "lankle", "rankle", "chest", 181 | "ltoes", "rtoes", "neck", 182 | "linshoulder", "rinshoulder", 183 | "head", "lshoulder", "rshoulder", 184 | "lelbow", "relbow", 185 | "lwrist", "rwrist", 186 | "lhand", "rhand", 187 | ] 188 | ``` 189 | -------------------------------------------------------------------------------- /aist_plusplus/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/aistplusplus_api/2dd7b3e946b794fd0081c98e2e2433545abf8b87/aist_plusplus/__init__.py -------------------------------------------------------------------------------- /aist_plusplus/features/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/aistplusplus_api/2dd7b3e946b794fd0081c98e2e2433545abf8b87/aist_plusplus/features/__init__.py -------------------------------------------------------------------------------- /aist_plusplus/features/kinetic.py: -------------------------------------------------------------------------------- 1 | # BSD License 2 | 3 | # For fairmotion software 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. 6 | # Modified by Ruilong Li 7 | 8 | # Redistribution and use in source and binary forms, with or without modification, 9 | # are permitted provided that the following conditions are met: 10 | 11 | # * Redistributions of source code must retain the above copyright notice, this 12 | # list of conditions and the following disclaimer. 13 | 14 | # * Redistributions in binary form must reproduce the above copyright notice, 15 | # this list of conditions and the following disclaimer in the documentation 16 | # and/or other materials provided with the distribution. 17 | 18 | # * Neither the name Facebook nor the names of its contributors may be used to 19 | # endorse or promote products derived from this software without specific 20 | # prior written permission. 21 | 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 23 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 24 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 25 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 26 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 29 | # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | import numpy as np 33 | from . import utils as feat_utils 34 | 35 | 36 | def extract_kinetic_features(positions): 37 | assert len(positions.shape) == 3 # (seq_len, n_joints, 3) 38 | features = KineticFeatures(positions) 39 | kinetic_feature_vector = [] 40 | for i in range(positions.shape[1]): 41 | feature_vector = np.hstack( 42 | [ 43 | features.average_kinetic_energy_horizontal(i), 44 | features.average_kinetic_energy_vertical(i), 45 | features.average_energy_expenditure(i), 46 | ] 47 | ) 48 | kinetic_feature_vector.extend(feature_vector) 49 | kinetic_feature_vector = np.array(kinetic_feature_vector, dtype=np.float32) 50 | return kinetic_feature_vector 51 | 52 | 53 | class KineticFeatures: 54 | def __init__( 55 | self, positions, frame_time=1./60, up_vec="y", sliding_window=2 56 | ): 57 | self.positions = positions 58 | self.frame_time = frame_time 59 | self.up_vec = up_vec 60 | self.sliding_window = sliding_window 61 | 62 | def average_kinetic_energy(self, joint): 63 | average_kinetic_energy = 0 64 | for i in range(1, len(self.positions)): 65 | average_velocity = feat_utils.calc_average_velocity( 66 | self.positions, i, joint, self.sliding_window, self.frame_time 67 | ) 68 | average_kinetic_energy += average_velocity ** 2 69 | average_kinetic_energy = average_kinetic_energy / ( 70 | len(self.positions) - 1.0 71 | ) 72 | return average_kinetic_energy 73 | 74 | def average_kinetic_energy_horizontal(self, joint): 75 | val = 0 76 | for i in range(1, len(self.positions)): 77 | average_velocity = feat_utils.calc_average_velocity_horizontal( 78 | self.positions, 79 | i, 80 | joint, 81 | self.sliding_window, 82 | self.frame_time, 83 | self.up_vec, 84 | ) 85 | val += average_velocity ** 2 86 | val = val / (len(self.positions) - 1.0) 87 | return val 88 | 89 | def average_kinetic_energy_vertical(self, joint): 90 | val = 0 91 | for i in range(1, len(self.positions)): 92 | average_velocity = feat_utils.calc_average_velocity_vertical( 93 | self.positions, 94 | i, 95 | joint, 96 | self.sliding_window, 97 | self.frame_time, 98 | self.up_vec, 99 | ) 100 | val += average_velocity ** 2 101 | val = val / (len(self.positions) - 1.0) 102 | return val 103 | 104 | def average_energy_expenditure(self, joint): 105 | val = 0.0 106 | for i in range(1, len(self.positions)): 107 | val += feat_utils.calc_average_acceleration( 108 | self.positions, i, joint, self.sliding_window, self.frame_time 109 | ) 110 | val = val / (len(self.positions) - 1.0) 111 | return val 112 | -------------------------------------------------------------------------------- /aist_plusplus/features/manual.py: -------------------------------------------------------------------------------- 1 | # BSD License 2 | 3 | # For fairmotion software 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. 6 | # Modified by Ruilong Li 7 | 8 | # Redistribution and use in source and binary forms, with or without modification, 9 | # are permitted provided that the following conditions are met: 10 | 11 | # * Redistributions of source code must retain the above copyright notice, this 12 | # list of conditions and the following disclaimer. 13 | 14 | # * Redistributions in binary form must reproduce the above copyright notice, 15 | # this list of conditions and the following disclaimer in the documentation 16 | # and/or other materials provided with the distribution. 17 | 18 | # * Neither the name Facebook nor the names of its contributors may be used to 19 | # endorse or promote products derived from this software without specific 20 | # prior written permission. 21 | 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 23 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 24 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 25 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 26 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 29 | # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | import numpy as np 33 | from . import utils as feat_utils 34 | 35 | 36 | SMPL_JOINT_NAMES = [ 37 | "root", 38 | "lhip", "rhip", "belly", 39 | "lknee", "rknee", "spine", 40 | "lankle", "rankle", "chest", 41 | "ltoes", "rtoes", "neck", 42 | "linshoulder", "rinshoulder", 43 | "head", "lshoulder", "rshoulder", 44 | "lelbow", "relbow", 45 | "lwrist", "rwrist", 46 | "lhand", "rhand", 47 | ] 48 | 49 | 50 | def extract_manual_features(positions): 51 | assert len(positions.shape) == 3 # (seq_len, n_joints, 3) 52 | features = [] 53 | f = ManualFeatures(positions) 54 | for _ in range(1, positions.shape[0]): 55 | pose_features = [] 56 | pose_features.append( 57 | f.f_nmove("neck", "rhip", "lhip", "rwrist", 1.8 * f.hl) 58 | ) 59 | pose_features.append( 60 | f.f_nmove("neck", "lhip", "rhip", "lwrist", 1.8 * f.hl) 61 | ) 62 | pose_features.append( 63 | f.f_nplane("chest", "neck", "neck", "rwrist", 0.2 * f.hl) 64 | ) 65 | pose_features.append( 66 | f.f_nplane("chest", "neck", "neck", "lwrist", 0.2 * f.hl) 67 | ) 68 | pose_features.append( 69 | f.f_move("belly", "chest", "chest", "rwrist", 1.8 * f.hl) 70 | ) 71 | pose_features.append( 72 | f.f_move("belly", "chest", "chest", "lwrist", 1.8 * f.hl) 73 | ) 74 | pose_features.append( 75 | f.f_angle("relbow", "rshoulder", "relbow", "rwrist", [0, 110]) 76 | ) 77 | pose_features.append( 78 | f.f_angle("lelbow", "lshoulder", "lelbow", "lwrist", [0, 110]) 79 | ) 80 | pose_features.append( 81 | f.f_nplane( 82 | "lshoulder", "rshoulder", "lwrist", "rwrist", 2.5 * f.sw 83 | ) 84 | ) 85 | pose_features.append( 86 | f.f_move("lwrist", "rwrist", "rwrist", "lwrist", 1.4 * f.hl) 87 | ) 88 | pose_features.append( 89 | f.f_move("rwrist", "root", "lwrist", "root", 1.4 * f.hl) 90 | ) 91 | pose_features.append( 92 | f.f_move("lwrist", "root", "rwrist", "root", 1.4 * f.hl) 93 | ) 94 | pose_features.append(f.f_fast("rwrist", 2.5 * f.hl)) 95 | pose_features.append(f.f_fast("lwrist", 2.5 * f.hl)) 96 | pose_features.append( 97 | f.f_plane("root", "lhip", "ltoes", "rankle", 0.38 * f.hl) 98 | ) 99 | pose_features.append( 100 | f.f_plane("root", "rhip", "rtoes", "lankle", 0.38 * f.hl) 101 | ) 102 | pose_features.append( 103 | f.f_nplane("zero", "y_unit", "y_min", "rankle", 1.2 * f.hl) 104 | ) 105 | pose_features.append( 106 | f.f_nplane("zero", "y_unit", "y_min", "lankle", 1.2 * f.hl) 107 | ) 108 | pose_features.append( 109 | f.f_nplane("lhip", "rhip", "lankle", "rankle", 2.1 * f.hw) 110 | ) 111 | pose_features.append( 112 | f.f_angle("rknee", "rhip", "rknee", "rankle", [0, 110]) 113 | ) 114 | pose_features.append( 115 | f.f_angle("lknee", "lhip", "lknee", "lankle", [0, 110]) 116 | ) 117 | pose_features.append(f.f_fast("rankle", 2.5 * f.hl)) 118 | pose_features.append(f.f_fast("lankle", 2.5 * f.hl)) 119 | pose_features.append( 120 | f.f_angle("neck", "root", "rshoulder", "relbow", [25, 180]) 121 | ) 122 | pose_features.append( 123 | f.f_angle("neck", "root", "lshoulder", "lelbow", [25, 180]) 124 | ) 125 | pose_features.append( 126 | f.f_angle("neck", "root", "rhip", "rknee", [50, 180]) 127 | ) 128 | pose_features.append( 129 | f.f_angle("neck", "root", "lhip", "lknee", [50, 180]) 130 | ) 131 | pose_features.append( 132 | f.f_plane("rankle", "neck", "lankle", "root", 0.5 * f.hl) 133 | ) 134 | pose_features.append( 135 | f.f_angle("neck", "root", "zero", "y_unit", [70, 110]) 136 | ) 137 | pose_features.append( 138 | f.f_nplane("zero", "minus_y_unit", "y_min", "rwrist", -1.2 * f.hl) 139 | ) 140 | pose_features.append( 141 | f.f_nplane("zero", "minus_y_unit", "y_min", "lwrist", -1.2 * f.hl) 142 | ) 143 | pose_features.append(f.f_fast("root", 2.3 * f.hl)) 144 | features.append(pose_features) 145 | f.next_frame() 146 | features = np.array(features, dtype=np.float32).mean(axis=0) 147 | return features 148 | 149 | 150 | class ManualFeatures: 151 | def __init__(self, positions, joint_names=SMPL_JOINT_NAMES): 152 | self.positions = positions 153 | self.joint_names = joint_names 154 | self.frame_num = 1 155 | 156 | # humerus length 157 | self.hl = feat_utils.distance_between_points( 158 | [1.99113488e-01, 2.36807942e-01, -1.80702247e-02], # "lshoulder", 159 | [4.54445392e-01, 2.21158922e-01, -4.10167128e-02], # "lelbow" 160 | ) 161 | # shoulder width 162 | self.sw = feat_utils.distance_between_points( 163 | [1.99113488e-01, 2.36807942e-01, -1.80702247e-02], # "lshoulder" 164 | [-1.91692337e-01, 2.36928746e-01, -1.23055102e-02,], # "rshoulder" 165 | ) 166 | # hip width 167 | self.hw = feat_utils.distance_between_points( 168 | [5.64076714e-02, -3.23069185e-01, 1.09197125e-02], # "lhip" 169 | [-6.24834076e-02, -3.31302464e-01, 1.50412619e-02], # "rhip" 170 | ) 171 | 172 | def next_frame(self): 173 | self.frame_num += 1 174 | 175 | def transform_and_fetch_position(self, j): 176 | if j == "y_unit": 177 | return [0, 1, 0] 178 | elif j == "minus_y_unit": 179 | return [0, -1, 0] 180 | elif j == "zero": 181 | return [0, 0, 0] 182 | elif j == "y_min": 183 | return [ 184 | 0, 185 | min( 186 | [y for (_, y, _) in self.positions[self.frame_num]] 187 | ), 188 | 0, 189 | ] 190 | return self.positions[self.frame_num][ 191 | self.joint_names.index(j) 192 | ] 193 | 194 | def transform_and_fetch_prev_position(self, j): 195 | return self.positions[self.frame_num - 1][ 196 | self.joint_names.index(j) 197 | ] 198 | 199 | def f_move(self, j1, j2, j3, j4, range): 200 | j1_prev, j2_prev, j3_prev, j4_prev = [ 201 | self.transform_and_fetch_prev_position(j) for j in [j1, j2, j3, j4] 202 | ] 203 | j1, j2, j3, j4 = [ 204 | self.transform_and_fetch_position(j) for j in [j1, j2, j3, j4] 205 | ] 206 | return feat_utils.velocity_direction_above_threshold( 207 | j1, j1_prev, j2, j2_prev, j3, j3_prev, range 208 | ) 209 | 210 | def f_nmove(self, j1, j2, j3, j4, range): 211 | j1_prev, j2_prev, j3_prev, j4_prev = [ 212 | self.transform_and_fetch_prev_position(j) for j in [j1, j2, j3, j4] 213 | ] 214 | j1, j2, j3, j4 = [ 215 | self.transform_and_fetch_position(j) for j in [j1, j2, j3, j4] 216 | ] 217 | return feat_utils.velocity_direction_above_threshold_normal( 218 | j1, j1_prev, j2, j3, j4, j4_prev, range 219 | ) 220 | 221 | def f_plane(self, j1, j2, j3, j4, threshold): 222 | j1, j2, j3, j4 = [ 223 | self.transform_and_fetch_position(j) for j in [j1, j2, j3, j4] 224 | ] 225 | return feat_utils.distance_from_plane(j1, j2, j3, j4, threshold) 226 | 227 | def f_nplane(self, j1, j2, j3, j4, threshold): 228 | j1, j2, j3, j4 = [ 229 | self.transform_and_fetch_position(j) for j in [j1, j2, j3, j4] 230 | ] 231 | return feat_utils.distance_from_plane_normal(j1, j2, j3, j4, threshold) 232 | 233 | def f_angle(self, j1, j2, j3, j4, range): 234 | j1, j2, j3, j4 = [ 235 | self.transform_and_fetch_position(j) for j in [j1, j2, j3, j4] 236 | ] 237 | return feat_utils.angle_within_range(j1, j2, j3, j4, range) 238 | 239 | def f_fast(self, j1, threshold): 240 | j1_prev = self.transform_and_fetch_prev_position(j1) 241 | j1 = self.transform_and_fetch_position(j1) 242 | return feat_utils.velocity_above_threshold(j1, j1_prev, threshold) 243 | -------------------------------------------------------------------------------- /aist_plusplus/features/utils.py: -------------------------------------------------------------------------------- 1 | # BSD License 2 | 3 | # For fairmotion software 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. 6 | 7 | # Redistribution and use in source and binary forms, with or without modification, 8 | # are permitted provided that the following conditions are met: 9 | 10 | # * Redistributions of source code must retain the above copyright notice, this 11 | # list of conditions and the following disclaimer. 12 | 13 | # * Redistributions in binary form must reproduce the above copyright notice, 14 | # this list of conditions and the following disclaimer in the documentation 15 | # and/or other materials provided with the distribution. 16 | 17 | # * Neither the name Facebook nor the names of its contributors may be used to 18 | # endorse or promote products derived from this software without specific 19 | # prior written permission. 20 | 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | import numpy as np 32 | 33 | 34 | def distance_between_points(a, b): 35 | return np.linalg.norm(np.array(a) - np.array(b)) 36 | 37 | 38 | def distance_from_plane(a, b, c, p, threshold): 39 | ba = np.array(b) - np.array(a) 40 | ca = np.array(c) - np.array(a) 41 | cross = np.cross(ca, ba) 42 | 43 | pa = np.array(p) - np.array(a) 44 | return np.dot(cross, pa) / np.linalg.norm(cross) > threshold 45 | 46 | 47 | def distance_from_plane_normal(n1, n2, a, p, threshold): 48 | normal = np.array(n2) - np.array(n1) 49 | pa = np.array(p) - np.array(a) 50 | return np.dot(normal, pa) / np.linalg.norm(normal) > threshold 51 | 52 | 53 | def angle_within_range(j1, j2, k1, k2, range): 54 | j = np.array(j2) - np.array(j1) 55 | k = np.array(k2) - np.array(k1) 56 | 57 | angle = np.arccos(np.dot(j, k) / (np.linalg.norm(j) * np.linalg.norm(k))) 58 | angle = np.degrees(angle) 59 | 60 | if angle > range[0] and angle < range[1]: 61 | return True 62 | else: 63 | return False 64 | 65 | 66 | def velocity_direction_above_threshold( 67 | j1, j1_prev, j2, j2_prev, p, p_prev, threshold, time_per_frame=1 / 120 68 | ): 69 | velocity = ( 70 | np.array(p) - np.array(j1) - (np.array(p_prev) - np.array(j1_prev)) 71 | ) 72 | direction = np.array(j2) - np.array(j1) 73 | 74 | velocity_along_direction = np.dot(velocity, direction) / np.linalg.norm( 75 | direction 76 | ) 77 | velocity_along_direction = velocity_along_direction / time_per_frame 78 | return velocity_along_direction > threshold 79 | 80 | 81 | def velocity_direction_above_threshold_normal( 82 | j1, j1_prev, j2, j3, p, p_prev, threshold, time_per_frame=1 / 120 83 | ): 84 | velocity = ( 85 | np.array(p) - np.array(j1) - (np.array(p_prev) - np.array(j1_prev)) 86 | ) 87 | j31 = np.array(j3) - np.array(j1) 88 | j21 = np.array(j2) - np.array(j1) 89 | direction = np.cross(j31, j21) 90 | 91 | velocity_along_direction = np.dot(velocity, direction) / np.linalg.norm( 92 | direction 93 | ) 94 | velocity_along_direction = velocity_along_direction / time_per_frame 95 | return velocity_along_direction > threshold 96 | 97 | 98 | def velocity_above_threshold(p, p_prev, threshold, time_per_frame=1 / 120): 99 | velocity = np.linalg.norm(np.array(p) - np.array(p_prev)) / time_per_frame 100 | return velocity > threshold 101 | 102 | 103 | def calc_average_velocity(positions, i, joint_idx, sliding_window, frame_time): 104 | current_window = 0 105 | average_velocity = np.zeros(len(positions[0][joint_idx])) 106 | for j in range(-sliding_window, sliding_window + 1): 107 | if i + j - 1 < 0 or i + j >= len(positions): 108 | continue 109 | average_velocity += ( 110 | positions[i + j][joint_idx] - positions[i + j - 1][joint_idx] 111 | ) 112 | current_window += 1 113 | return np.linalg.norm(average_velocity / (current_window * frame_time)) 114 | 115 | 116 | def calc_average_acceleration( 117 | positions, i, joint_idx, sliding_window, frame_time 118 | ): 119 | current_window = 0 120 | average_acceleration = np.zeros(len(positions[0][joint_idx])) 121 | for j in range(-sliding_window, sliding_window + 1): 122 | if i + j - 1 < 0 or i + j + 1 >= len(positions): 123 | continue 124 | v2 = ( 125 | positions[i + j + 1][joint_idx] - positions[i + j][joint_idx] 126 | ) / frame_time 127 | v1 = ( 128 | positions[i + j][joint_idx] 129 | - positions[i + j - 1][joint_idx] / frame_time 130 | ) 131 | average_acceleration += (v2 - v1) / frame_time 132 | current_window += 1 133 | return np.linalg.norm(average_acceleration / current_window) 134 | 135 | 136 | def calc_average_velocity_horizontal( 137 | positions, i, joint_idx, sliding_window, frame_time, up_vec="z" 138 | ): 139 | current_window = 0 140 | average_velocity = np.zeros(len(positions[0][joint_idx])) 141 | for j in range(-sliding_window, sliding_window + 1): 142 | if i + j - 1 < 0 or i + j >= len(positions): 143 | continue 144 | average_velocity += ( 145 | positions[i + j][joint_idx] - positions[i + j - 1][joint_idx] 146 | ) 147 | current_window += 1 148 | if up_vec == "y": 149 | average_velocity = np.array( 150 | [average_velocity[0], average_velocity[2]] 151 | ) / (current_window * frame_time) 152 | elif up_vec == "z": 153 | average_velocity = np.array( 154 | [average_velocity[0], average_velocity[1]] 155 | ) / (current_window * frame_time) 156 | else: 157 | raise NotImplementedError 158 | return np.linalg.norm(average_velocity) 159 | 160 | 161 | def calc_average_velocity_vertical( 162 | positions, i, joint_idx, sliding_window, frame_time, up_vec 163 | ): 164 | current_window = 0 165 | average_velocity = np.zeros(len(positions[0][joint_idx])) 166 | for j in range(-sliding_window, sliding_window + 1): 167 | if i + j - 1 < 0 or i + j >= len(positions): 168 | continue 169 | average_velocity += ( 170 | positions[i + j][joint_idx] - positions[i + j - 1][joint_idx] 171 | ) 172 | current_window += 1 173 | if up_vec == "y": 174 | average_velocity = np.array([average_velocity[1]]) / ( 175 | current_window * frame_time 176 | ) 177 | elif up_vec == "z": 178 | average_velocity = np.array([average_velocity[2]]) / ( 179 | current_window * frame_time 180 | ) 181 | else: 182 | raise NotImplementedError 183 | return np.linalg.norm(average_velocity) -------------------------------------------------------------------------------- /aist_plusplus/loader.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """AIST++ Dataset Loader.""" 16 | import json 17 | import os 18 | import pickle 19 | 20 | import aniposelib 21 | import numpy as np 22 | import cv2 23 | 24 | 25 | class AISTDataset: 26 | """A dataset class for loading, processing and plotting AIST++.""" 27 | 28 | VIEWS = ['c01', 'c02', 'c03', 'c04', 'c05', 'c06', 'c07', 'c08', 'c09'] 29 | 30 | def __init__(self, anno_dir): 31 | assert os.path.exists(anno_dir), f'Data does not exist at {anno_dir}!' 32 | 33 | # Init paths 34 | self.camera_dir = os.path.join(anno_dir, 'cameras/') 35 | self.motion_dir = os.path.join(anno_dir, 'motions/') 36 | self.keypoint3d_dir = os.path.join(anno_dir, 'keypoints3d/') 37 | self.keypoint2d_dir = os.path.join(anno_dir, 'keypoints2d/') 38 | self.filter_file = os.path.join(anno_dir, 'ignore_list.txt') 39 | 40 | # Load environment setting mapping 41 | self.mapping_seq2env = {} # sequence name -> env name 42 | self.mapping_env2seq = {} # env name -> a list of sequence names 43 | env_mapping_file = os.path.join(self.camera_dir, 'mapping.txt') 44 | env_mapping = np.loadtxt(env_mapping_file, dtype=str) 45 | for seq_name, env_name in env_mapping: 46 | self.mapping_seq2env[seq_name] = env_name 47 | if env_name not in self.mapping_env2seq: 48 | self.mapping_env2seq[env_name] = [] 49 | self.mapping_env2seq[env_name].append(seq_name) 50 | 51 | @classmethod 52 | def get_video_name(cls, seq_name, view): 53 | """Get AIST video name from AIST++ sequence name.""" 54 | return seq_name.replace('cAll', view) 55 | 56 | @classmethod 57 | def get_seq_name(cls, video_name): 58 | """Get AIST++ sequence name from AIST video name.""" 59 | tags = video_name.split('_') 60 | if len(tags) == 3: 61 | view = tags[1] 62 | tags[1] = 'cAll' 63 | else: 64 | view = tags[2] 65 | tags[2] = 'cAll' 66 | return '_'.join(tags), view 67 | 68 | @classmethod 69 | def load_camera_group(cls, camera_dir, env_name): 70 | """Load a set of cameras in the environment.""" 71 | file_path = os.path.join(camera_dir, f'{env_name}.json') 72 | assert os.path.exists(file_path), f'File {file_path} does not exist!' 73 | with open(file_path, 'r') as f: 74 | params = json.load(f) 75 | cameras = [] 76 | for param_dict in params: 77 | camera = aniposelib.cameras.Camera(name=param_dict['name'], 78 | size=param_dict['size'], 79 | matrix=param_dict['matrix'], 80 | rvec=param_dict['rotation'], 81 | tvec=param_dict['translation'], 82 | dist=param_dict['distortions']) 83 | cameras.append(camera) 84 | camera_group = aniposelib.cameras.CameraGroup(cameras) 85 | return camera_group 86 | 87 | @classmethod 88 | def load_motion(cls, motion_dir, seq_name): 89 | """Load a motion sequence represented using SMPL format.""" 90 | file_path = os.path.join(motion_dir, f'{seq_name}.pkl') 91 | assert os.path.exists(file_path), f'File {file_path} does not exist!' 92 | with open(file_path, 'rb') as f: 93 | data = pickle.load(f) 94 | smpl_poses = data['smpl_poses'] # (N, 24, 3) 95 | smpl_scaling = data['smpl_scaling'] # (1,) 96 | smpl_trans = data['smpl_trans'] # (N, 3) 97 | return smpl_poses, smpl_scaling, smpl_trans 98 | 99 | @classmethod 100 | def load_keypoint3d(cls, keypoint_dir, seq_name, use_optim=False): 101 | """Load a 3D keypoint sequence represented using COCO format.""" 102 | file_path = os.path.join(keypoint_dir, f'{seq_name}.pkl') 103 | assert os.path.exists(file_path), f'File {file_path} does not exist!' 104 | with open(file_path, 'rb') as f: 105 | data = pickle.load(f) 106 | if use_optim: 107 | return data['keypoints3d_optim'] # (N, 17, 3) 108 | else: 109 | return data['keypoints3d'] # (N, 17, 3) 110 | 111 | @classmethod 112 | def load_keypoint2d(cls, keypoint_dir, seq_name): 113 | """Load a 2D keypoint sequence represented using COCO format.""" 114 | file_path = os.path.join(keypoint_dir, f'{seq_name}.pkl') 115 | assert os.path.exists(file_path), f'File {file_path} does not exist!' 116 | with open(file_path, 'rb') as f: 117 | data = pickle.load(f) 118 | if 'det_scores' in data: 119 | keypoints2d = data['keypoints2d'] # (nviews, N, 17, 3) 120 | det_scores = data['det_scores'] # (nviews, N) 121 | timestamps = data['timestamps'] # (N,) 122 | return keypoints2d, det_scores, timestamps 123 | else: 124 | keypoints2d = data['keypoints2d'] # (nviews, nframes, (nsubjects, (133, 3))) 125 | bboxes = data['bboxes'] # (nviews, (nframes, (nsubjects, (5,)))) 126 | timestamps = data['timestamps'] # (nviews, (nframes,)) 127 | return keypoints2d, bboxes, timestamps 128 | 129 | @classmethod 130 | def load_frames(cls, video_path, frame_ids=None, fps=60): 131 | """Load a single or multiple frames from a video.""" 132 | if frame_ids is None: 133 | frame_ids = range(1e6) 134 | assert isinstance(frame_ids, list) 135 | if not os.path.exists(video_path): 136 | return None 137 | cap = cv2.VideoCapture(video_path) 138 | assert cap.isOpened(), "check if your opencv is installed with ffmpeg supported." 139 | 140 | images = [] 141 | for frame_id in frame_ids: 142 | sec = frame_id * 1.0 / fps 143 | cap.set(cv2.CAP_PROP_POS_MSEC, (sec * 1000)) 144 | success, image = cap.read() 145 | if not success: 146 | break 147 | images.append(image) 148 | 149 | if len(images) > 0: 150 | images = np.stack(images) 151 | else: 152 | images = None 153 | 154 | cap.release() 155 | return images 156 | -------------------------------------------------------------------------------- /aist_plusplus/utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Utils for AIST++ Dataset.""" 16 | import os 17 | 18 | import ffmpeg 19 | import numpy as np 20 | 21 | 22 | def ffmpeg_video_read(video_path, fps=None): 23 | """Video reader based on FFMPEG. 24 | 25 | This function supports setting fps for video reading. It is critical 26 | as AIST++ Dataset are constructed under exact 60 fps, while some of 27 | the AIST dance videos are not percisely 60 fps. 28 | 29 | Args: 30 | video_path: A video file. 31 | fps: Use specific fps for video reading. (optional) 32 | Returns: 33 | A `np.array` with the shape of [seq_len, height, width, 3] 34 | """ 35 | assert os.path.exists(video_path), f'{video_path} does not exist!' 36 | try: 37 | probe = ffmpeg.probe(video_path) 38 | except ffmpeg.Error as e: 39 | print('stdout:', e.stdout.decode('utf8')) 40 | print('stderr:', e.stderr.decode('utf8')) 41 | raise e 42 | video_info = next(stream for stream in probe['streams'] 43 | if stream['codec_type'] == 'video') 44 | width = int(video_info['width']) 45 | height = int(video_info['height']) 46 | stream = ffmpeg.input(video_path) 47 | if fps: 48 | stream = ffmpeg.filter(stream, 'fps', fps=fps, round='down') 49 | stream = ffmpeg.output(stream, 'pipe:', format='rawvideo', pix_fmt='rgb24') 50 | out, _ = ffmpeg.run(stream, capture_stdout=True) 51 | out = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3]) 52 | return out.copy() 53 | 54 | 55 | def ffmpeg_video_write(data, video_path, fps=25): 56 | """Video writer based on FFMPEG. 57 | 58 | Args: 59 | data: A `np.array` with the shape of [seq_len, height, width, 3] 60 | video_path: A video file. 61 | fps: Use specific fps for video writing. (optional) 62 | """ 63 | assert len(data.shape) == 4, f'input shape is not valid! Got {data.shape}!' 64 | _, height, width, _ = data.shape 65 | os.makedirs(os.path.dirname(video_path), exist_ok=True) 66 | writer = ( 67 | ffmpeg 68 | .input('pipe:', framerate=fps, format='rawvideo', 69 | pix_fmt='rgb24', s='{}x{}'.format(width, height)) 70 | .output(video_path, pix_fmt='yuv420p') 71 | .overwrite_output() 72 | .run_async(pipe_stdin=True) 73 | ) 74 | for frame in data: 75 | writer.stdin.write(frame.astype(np.uint8).tobytes()) 76 | writer.stdin.close() 77 | 78 | 79 | def ffmpeg_video_to_images(video_path, image_dir, fps=None, ext=".jpg") -> None: 80 | """Video to images converter based on FFMPEG. 81 | 82 | This function supports setting fps for video reading. It is critical 83 | as AIST++ Dataset are constructed under exact 60 fps, while some of 84 | the AIST dance videos are not percisely 60 fps. 85 | 86 | Args: 87 | video_path: A video file. 88 | image_dir: A output directory to store the images. 89 | fps: Use specific fps for video reading. (optional) 90 | """ 91 | assert os.path.exists(video_path), f'{video_path} does not exist!' 92 | os.makedirs(image_dir, exist_ok=True) 93 | stream = ffmpeg.input(video_path) 94 | if fps: 95 | stream = ffmpeg.filter(stream, 'fps', fps=fps, round='down') 96 | stream = ffmpeg.output( 97 | stream, os.path.join(image_dir, '%08d' + ext), start_number=0) 98 | stream = ffmpeg.overwrite_output(stream) 99 | ffmpeg.run(stream, quiet=True) 100 | 101 | 102 | def unify_joint_mappings(dataset='openpose25'): 103 | """Unify different joint definations. 104 | 105 | Output unified defination: 106 | ['Nose', 107 | 'RShoulder', 'RElbow', 'RWrist', 108 | 'LShoulder', 'LElbow', 'LWrist', 109 | 'RHip', 'RKnee', 'RAnkle', 110 | 'LHip', 'LKnee', 'LAnkle', 111 | 'REye', 'LEye', 112 | 'REar', 'LEar', 113 | 'LBigToe', 'LHeel', 114 | 'RBigToe', 'RHeel',] 115 | 116 | Args: 117 | dataset: `openpose25`, `coco`(17) and `smpl`. 118 | Returns: 119 | a list of indexs that maps the joints to a unified defination. 120 | """ 121 | if dataset == 'openpose25': 122 | return np.array([ 123 | 0, 124 | 2, 3, 4, 125 | 5, 6, 7, 126 | 9, 10, 11, 127 | 12, 13, 14, 128 | 15, 16, 129 | 17, 18, 130 | 19, 21, 131 | 22, 24, 132 | ], dtype=np.int32) 133 | elif dataset == 'smpl': 134 | # note SMPL needs to be "left-right flipped" to be consistent 135 | # with others 136 | return np.array([ 137 | 24, 138 | 16, 18, 20, 139 | 17, 19, 21, 140 | 1, 4, 7, 141 | 2, 5, 8, 142 | 26, 25, 143 | 28, 27, 144 | 32, 34, 145 | 29, 31, 146 | ], dtype=np.int32) 147 | elif dataset == 'coco': 148 | return np.array([ 149 | 0, 150 | 5, 7, 9, 151 | 6, 8, 10, 152 | 11, 13, 15, 153 | 12, 14, 16, 154 | 1, 2, 155 | 3, 4, 156 | ], dtype=np.int32) 157 | else: 158 | raise ValueError(f'{dataset} is not supported') 159 | 160 | -------------------------------------------------------------------------------- /aist_plusplus/visualizer.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Visualize the AIST++ Dataset.""" 16 | 17 | from . import utils 18 | import cv2 19 | import numpy as np 20 | 21 | _COLORS = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], 22 | [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85], 23 | [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], 24 | [0, 0, 255], [85, 0, 255], [170, 0, 255], [255, 0, 255], 25 | [255, 0, 170], [255, 0, 85]] 26 | 27 | 28 | def plot_kpt(keypoint, canvas, color=None): 29 | for i, (x, y) in enumerate(keypoint[:, 0:2]): 30 | if np.isnan(x) or np.isnan(y) or x < 0 or y < 0: 31 | continue 32 | cv2.circle(canvas, (int(x), int(y)), 33 | 7, 34 | color if color is not None else _COLORS[i % len(_COLORS)], 35 | thickness=-1) 36 | return canvas 37 | 38 | 39 | def plot_on_video(keypoints2d, video_path, save_path, fps=60): 40 | assert len(keypoints2d.shape) == 3, ( 41 | f'Input shape is not valid! Got {keypoints2d.shape}') 42 | video = utils.ffmpeg_video_read(video_path, fps=fps) 43 | for iframe, keypoint in enumerate(keypoints2d): 44 | if iframe >= video.shape[0]: 45 | break 46 | video[iframe] = plot_kpt(keypoint, video[iframe]) 47 | utils.ffmpeg_video_write(video, save_path, fps=fps) 48 | 49 | 50 | -------------------------------------------------------------------------------- /assets/aist_pipeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/aistplusplus_api/2dd7b3e946b794fd0081c98e2e2433545abf8b87/assets/aist_pipeline.jpg -------------------------------------------------------------------------------- /demos/extract_motion_feats.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Demo code for motion feature extraction.""" 16 | from absl import app 17 | from absl import flags 18 | from aist_plusplus.loader import AISTDataset 19 | from aist_plusplus.features.kinetic import extract_kinetic_features 20 | from aist_plusplus.features.manual import extract_manual_features 21 | from smplx import SMPL 22 | import torch 23 | 24 | 25 | FLAGS = flags.FLAGS 26 | flags.DEFINE_string( 27 | 'anno_dir', 28 | '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/', 29 | 'input local dictionary for AIST++ annotations.') 30 | flags.DEFINE_string( 31 | 'smpl_dir', 32 | '/usr/local/google/home/ruilongli/data/SMPL/', 33 | 'input local dictionary that stores SMPL data.') 34 | flags.DEFINE_string( 35 | 'video_name', 36 | 'gWA_sFM_c01_d27_mWA2_ch21', 37 | 'input video name to be visualized.') 38 | 39 | 40 | def main(_): 41 | # Parsing data info. 42 | aist_dataset = AISTDataset(FLAGS.anno_dir) 43 | seq_name, view = AISTDataset.get_seq_name(FLAGS.video_name) 44 | 45 | # SMPL joints 46 | smpl_poses, smpl_scaling, smpl_trans = AISTDataset.load_motion( 47 | aist_dataset.motion_dir, seq_name) 48 | smpl = SMPL(model_path=FLAGS.smpl_dir, gender='MALE', batch_size=1) 49 | # Note here we calculate `transl` as `smpl_trans/smpl_scaling` for 50 | # normalizing the motion in generic SMPL model scale. 51 | keypoints3d = smpl.forward( 52 | global_orient=torch.from_numpy(smpl_poses[:, 0:1]).float(), 53 | body_pose=torch.from_numpy(smpl_poses[:, 1:]).float(), 54 | transl=torch.from_numpy(smpl_trans / smpl_scaling).float(), 55 | ).joints.detach().numpy() 56 | 57 | # extract features 58 | features_k = extract_kinetic_features(keypoints3d) 59 | print ("kinetic features:", features_k) 60 | features_m = extract_manual_features(keypoints3d) 61 | print ("manual features:", features_m) 62 | 63 | 64 | if __name__ == '__main__': 65 | app.run(main) 66 | 67 | -------------------------------------------------------------------------------- /demos/run_dyn_processing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import json 4 | import glob 5 | 6 | from absl import app 7 | from absl import flags 8 | from aist_plusplus.loader import AISTDataset 9 | from aist_plusplus.utils import ffmpeg_video_to_images 10 | from smplx import SMPL 11 | import torch 12 | import imageio 13 | import numpy as np 14 | 15 | FLAGS = flags.FLAGS 16 | 17 | flags.DEFINE_list( 18 | 'sequence_names', 19 | "gBR_sBM_cAll_d04_mBR0_ch01", 20 | 'list of sequence names to be processed. None means to process all.') 21 | flags.DEFINE_string( 22 | 'anno_dir', 23 | '/home/ruilongli/data/AIST++/', 24 | 'input local dictionary for AIST++ annotations.') 25 | flags.DEFINE_string( 26 | 'smpl_dir', 27 | '/home/ruilongli/data/smpl_model/smpl/', 28 | 'input local dictionary that stores SMPL data.') 29 | flags.DEFINE_string( 30 | 'video_dir', 31 | '/home/ruilongli/data/AIST/videos/10M/', 32 | 'input local dictionary for AIST Dance Videos.') 33 | flags.DEFINE_string( 34 | 'video_alpha_dir', 35 | '/home/ruilongli/data/AIST++/segmentation/', 36 | 'output local dictionary that stores AIST++ segmentation masks.') 37 | flags.DEFINE_string( 38 | 'output_dir', 39 | '/home/ruilongli/data/AIST++_dyn', 40 | 'output local dictionary that stores AIST images.') 41 | 42 | 43 | def main(_): 44 | aist_dataset = AISTDataset(anno_dir=FLAGS.anno_dir) 45 | 46 | for seq_name in FLAGS.sequence_names: 47 | output_dir = os.path.join(FLAGS.output_dir, seq_name) 48 | 49 | # split images & masks 50 | for view in AISTDataset.VIEWS: 51 | video_name = AISTDataset.get_video_name(seq_name, view) 52 | logging.info("processing %s" % video_name) 53 | 54 | video_file = os.path.join(FLAGS.video_dir, video_name + ".mp4") 55 | image_dir = os.path.join(output_dir, "images", view) 56 | os.makedirs(image_dir, exist_ok=True) 57 | ffmpeg_video_to_images(video_file, image_dir, fps=60, ext=".jpg") 58 | 59 | video_file = os.path.join(FLAGS.video_alpha_dir, video_name + "_alpha1.mp4") 60 | image_dir = os.path.join(output_dir, "alpha1", view) 61 | os.makedirs(image_dir, exist_ok=True) 62 | ffmpeg_video_to_images(video_file, image_dir, fps=60, ext=".png") 63 | 64 | video_file = os.path.join(FLAGS.video_alpha_dir, video_name + "_alpha2.mp4") 65 | image_dir = os.path.join(output_dir, "alpha2", view) 66 | os.makedirs(image_dir, exist_ok=True) 67 | ffmpeg_video_to_images(video_file, image_dir, fps=60, ext=".png") 68 | 69 | # camera data 70 | env_name = aist_dataset.mapping_seq2env[seq_name] 71 | cgroup = AISTDataset.load_camera_group(aist_dataset.camera_dir, env_name) 72 | camera_data = cgroup.get_dicts() 73 | with open(os.path.join(output_dir, "camera.json"), "w") as fp: 74 | json.dump(camera_data, fp) 75 | 76 | # pose data 77 | pose_data = {} 78 | 79 | smpl_poses, smpl_scaling, smpl_trans = AISTDataset.load_motion( 80 | aist_dataset.motion_dir, seq_name) 81 | smpl_poses = torch.from_numpy(smpl_poses).float() 82 | smpl_scaling = torch.from_numpy(smpl_scaling).float() 83 | smpl_trans = torch.from_numpy(smpl_trans).float() 84 | 85 | smpl = SMPL(model_path=FLAGS.smpl_dir, gender='MALE', batch_size=1) 86 | with torch.no_grad(): 87 | rest_output, rest_transforms = smpl.forward( 88 | scaling=smpl_scaling.reshape(1, 1), 89 | ) 90 | pose_data["rest_joints"] = rest_output.joints.squeeze(0)[:24] 91 | pose_data["rest_verts"] = rest_output.vertices.squeeze(0) 92 | pose_data["rest_tfs"] = rest_transforms.squeeze(0) 93 | 94 | with torch.no_grad(): 95 | pose_output, pose_transforms = smpl.forward( 96 | global_orient=smpl_poses[:, 0:1], 97 | body_pose=smpl_poses[:, 1:], 98 | transl=smpl_trans, 99 | scaling=smpl_scaling.reshape(1, 1), 100 | ) 101 | pose_data["joints"] = pose_output.joints[:, :24] 102 | pose_data["verts"] = pose_output.vertices 103 | pose_data["tfs"] = pose_transforms 104 | pose_data["params"] = torch.cat( 105 | [smpl_poses, smpl_trans, smpl_scaling.expand(smpl_poses.shape[0], 1)], 106 | dim=-1 107 | ) 108 | for key, value in pose_data.items(): 109 | print (key, value.shape) 110 | 111 | torch.save(pose_data, os.path.join(output_dir, "pose_data.pt")) 112 | 113 | # post process alpha1 & alpha2 to trimap mask 114 | for view in AISTDataset.VIEWS: 115 | video_name = AISTDataset.get_video_name(seq_name, view) 116 | logging.info("processing %s" % video_name) 117 | image_dir = os.path.join(output_dir, "images", view) 118 | image_files = sorted(glob.glob(os.path.join(image_dir, "*.jpg"))) 119 | alpha1_dir = os.path.join(output_dir, "alpha1", view) 120 | alpha1_files = sorted(glob.glob(os.path.join(alpha1_dir, "*.png"))) 121 | alpha2_dir = os.path.join(output_dir, "alpha2", view) 122 | alpha2_files = sorted(glob.glob(os.path.join(alpha2_dir, "*.png"))) 123 | mask_dir = os.path.join(output_dir, "mask", view) 124 | os.makedirs(mask_dir, exist_ok=True) 125 | 126 | for image_file, alpha1_file, alpha2_file in zip( 127 | image_files, alpha1_files, alpha2_files 128 | ): 129 | image = imageio.imread(image_file) 130 | alpha1 = imageio.imread(alpha1_file) / 255.0 131 | alpha2 = imageio.imread(alpha2_file) / 255.0 132 | fg_mask = (alpha1 > 0.5) & (alpha2 > 0.5) 133 | bg_mask = (alpha1 < 0.5) & (alpha2 < 0.5) 134 | mask = np.zeros_like(image) 135 | mask[fg_mask] = 255 136 | mask[bg_mask] = 0 137 | mask[~ (fg_mask | bg_mask)] = 128 138 | imageio.imwrite( 139 | os.path.join(mask_dir, os.path.basename(alpha1_file)), mask) 140 | 141 | if __name__ == '__main__': 142 | app.run(main) 143 | -------------------------------------------------------------------------------- /demos/run_openpose_pipeline.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | SEQUENCE_NAME=gBR_sBM_cAll_d04_mBR0_ch01 4 | 5 | CUDA_VISIBLE_DEVICES=0,1,2,3 python processing/run_openpose.py --sequence_names=${SEQUENCE_NAME} 6 | CUDA_VISIBLE_DEVICES=0,1,2,3 python processing/run_preprocessing.py --sequence_names=${SEQUENCE_NAME} 7 | CUDA_VISIBLE_DEVICES=0,1,2,3 python processing/run_estimate_keypoints.py --sequence_names=${SEQUENCE_NAME} 8 | CUDA_VISIBLE_DEVICES=0,1,2,3 python processing/run_estimate_smpl.py --sequence_names=${SEQUENCE_NAME} 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python processing/run_segmentation.py --sequence_names=${SEQUENCE_NAME} -------------------------------------------------------------------------------- /demos/run_vis.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Demo code for running visualizer.""" 16 | import os 17 | 18 | from absl import app 19 | from absl import flags 20 | from aist_plusplus.loader import AISTDataset 21 | from aist_plusplus.visualizer import plot_on_video 22 | from smplx import SMPL 23 | import torch 24 | 25 | FLAGS = flags.FLAGS 26 | flags.DEFINE_string( 27 | 'anno_dir', 28 | '/home/ruilongli/data/AIST++/', 29 | 'input local dictionary for AIST++ annotations.') 30 | flags.DEFINE_string( 31 | 'video_dir', 32 | '/home/ruilongli/data/AIST/videos/10M/', 33 | 'input local dictionary for AIST Dance Videos.') 34 | flags.DEFINE_string( 35 | 'smpl_dir', 36 | '/home/ruilongli/data/smpl_model/smpl', 37 | 'input local dictionary that stores SMPL data.') 38 | flags.DEFINE_string( 39 | 'video_name', 40 | 'gBR_sBM_c01_d04_mBR0_ch01', 41 | 'input video name to be visualized.') 42 | flags.DEFINE_string( 43 | 'save_dir', 44 | './', 45 | 'output local dictionary that stores AIST++ visualization.') 46 | flags.DEFINE_enum( 47 | 'mode', '2D', ['2D', '3D', 'SMPL', 'SMPLMesh'], 48 | 'visualize 3D or 2D keypoints, or SMPL joints on image plane.') 49 | 50 | 51 | def main(_): 52 | # Parsing data info. 53 | aist_dataset = AISTDataset(FLAGS.anno_dir) 54 | video_path = os.path.join(FLAGS.video_dir, f'{FLAGS.video_name}.mp4') 55 | seq_name, view = AISTDataset.get_seq_name(FLAGS.video_name) 56 | view_idx = AISTDataset.VIEWS.index(view) 57 | 58 | # Parsing keypoints. 59 | if FLAGS.mode == '2D': # raw keypoints detection results. 60 | keypoints2d, _, _ = AISTDataset.load_keypoint2d( 61 | aist_dataset.keypoint2d_dir, seq_name) 62 | keypoints2d = keypoints2d[view_idx, :, :, 0:2] 63 | 64 | elif FLAGS.mode == '3D': # 3D keypoints with temporal optimization. 65 | keypoints3d = AISTDataset.load_keypoint3d( 66 | aist_dataset.keypoint3d_dir, seq_name, use_optim=True) 67 | nframes, njoints, _ = keypoints3d.shape 68 | env_name = aist_dataset.mapping_seq2env[seq_name] 69 | cgroup = AISTDataset.load_camera_group(aist_dataset.camera_dir, env_name) 70 | keypoints2d = cgroup.project(keypoints3d) 71 | keypoints2d = keypoints2d.reshape(9, nframes, njoints, 2)[view_idx] 72 | 73 | elif FLAGS.mode == 'SMPL': # SMPL joints 74 | smpl_poses, smpl_scaling, smpl_trans = AISTDataset.load_motion( 75 | aist_dataset.motion_dir, seq_name) 76 | smpl = SMPL(model_path=FLAGS.smpl_dir, gender='MALE', batch_size=1) 77 | keypoints3d = smpl.forward( 78 | global_orient=torch.from_numpy(smpl_poses[:, 0:1]).float(), 79 | body_pose=torch.from_numpy(smpl_poses[:, 1:]).float(), 80 | transl=torch.from_numpy(smpl_trans).float(), 81 | scaling=torch.from_numpy(smpl_scaling.reshape(1, 1)).float(), 82 | ).joints.detach().numpy() 83 | 84 | nframes, njoints, _ = keypoints3d.shape 85 | env_name = aist_dataset.mapping_seq2env[seq_name] 86 | cgroup = AISTDataset.load_camera_group(aist_dataset.camera_dir, env_name) 87 | keypoints2d = cgroup.project(keypoints3d) 88 | keypoints2d = keypoints2d.reshape(9, nframes, njoints, 2)[view_idx] 89 | 90 | elif FLAGS.mode == 'SMPLMesh': # SMPL Mesh 91 | import trimesh # install by `pip install trimesh` 92 | import vedo # install by `pip install vedo` 93 | smpl_poses, smpl_scaling, smpl_trans = AISTDataset.load_motion( 94 | aist_dataset.motion_dir, seq_name) 95 | smpl = SMPL(model_path=FLAGS.smpl_dir, gender='MALE', batch_size=1) 96 | vertices = smpl.forward( 97 | global_orient=torch.from_numpy(smpl_poses[:, 0:1]).float(), 98 | body_pose=torch.from_numpy(smpl_poses[:, 1:]).float(), 99 | transl=torch.from_numpy(smpl_trans).float(), 100 | scaling=torch.from_numpy(smpl_scaling.reshape(1, 1)).float(), 101 | ).vertices.detach().numpy()[0] # first frame 102 | faces = smpl.faces 103 | mesh = trimesh.Trimesh(vertices, faces) 104 | mesh.visual.face_colors = [200, 200, 250, 100] 105 | 106 | keypoints3d = AISTDataset.load_keypoint3d( 107 | aist_dataset.keypoint3d_dir, seq_name, use_optim=True) 108 | pts = vedo.Points(keypoints3d[0], r=20) # first frame 109 | 110 | vedo.show(mesh, pts, interactive=True) 111 | exit() 112 | 113 | # Visualize. 114 | os.makedirs(FLAGS.save_dir, exist_ok=True) 115 | save_path = os.path.join(FLAGS.save_dir, f'{FLAGS.video_name}.mp4') 116 | plot_on_video(keypoints2d, video_path, save_path, fps=60) 117 | 118 | 119 | if __name__ == '__main__': 120 | app.run(main) 121 | 122 | -------------------------------------------------------------------------------- /downloader.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Download AIST++ videos from AIST Dance Video Database website. 16 | 17 | Be aware: Before running this script to download the videos, you should have read 18 | the Terms of Use of the AIST Dance Video Database here: 19 | 20 | https://aistdancedb.ongaaccel.jp/terms_of_use/ 21 | """ 22 | import argparse 23 | import multiprocessing 24 | import os 25 | import sys 26 | import urllib.request 27 | from functools import partial 28 | 29 | SOURCE_URL = 'https://aistdancedb.ongaaccel.jp/v1.0.0/video/10M/' 30 | LIST_URL = 'https://storage.googleapis.com/aist_plusplus_public/20121228/video_list.txt' 31 | 32 | def _download(video_url, download_folder): 33 | save_path = os.path.join(download_folder, os.path.basename(video_url)) 34 | urllib.request.urlretrieve(video_url, save_path) 35 | 36 | if __name__ == '__main__': 37 | parser = argparse.ArgumentParser( 38 | description='Scripts for downloading AIST++ videos.') 39 | parser.add_argument( 40 | '--download_folder', 41 | type=str, 42 | required=True, 43 | help='where to store AIST++ videos.') 44 | parser.add_argument( 45 | '--num_processes', 46 | type=int, 47 | default=1, 48 | help='number of threads for multiprocessing.') 49 | args = parser.parse_args() 50 | 51 | ans = input( 52 | "Before running this script, please make sure you have read the " 53 | "of AIST Dance Video Database at here: \n" 54 | "\n" 55 | "https://aistdancedb.ongaaccel.jp/terms_of_use/\n" 56 | "\n" 57 | "Do you agree with the ? [Y/N]" 58 | ) 59 | if ans in ["Yes", "YES", "yes", "Y", "y"]: 60 | pass 61 | else: 62 | print ("Program exit. Please first acknowledge the .") 63 | exit() 64 | 65 | os.makedirs(args.download_folder, exist_ok=True) 66 | 67 | seq_names = urllib.request.urlopen(LIST_URL) 68 | seq_names = [seq_name.strip().decode('utf-8') for seq_name in seq_names] 69 | video_urls = [ 70 | os.path.join(SOURCE_URL, seq_name + '.mp4') for seq_name in seq_names] 71 | 72 | download_func = partial(_download, download_folder=args.download_folder) 73 | pool = multiprocessing.Pool(processes=args.num_processes) 74 | for i, _ in enumerate(pool.imap_unordered(download_func, video_urls)): 75 | sys.stderr.write('\rdownloading %d / %d' % (i + 1, len(video_urls))) 76 | sys.stderr.write('\ndone.\n') 77 | -------------------------------------------------------------------------------- /processing/requirements.txt: -------------------------------------------------------------------------------- 1 | vedo>=2020.4.2 2 | scipy>=1.3.1 -------------------------------------------------------------------------------- /processing/run_estimate_camera.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Estimate AIST++ camera parameters.""" 16 | import json 17 | import math 18 | import os 19 | import random 20 | 21 | from absl import app 22 | from absl import flags 23 | from aist_plusplus.loader import AISTDataset 24 | import aniposelib 25 | import numpy as np 26 | import vedo 27 | import cv2 28 | from scipy.spatial.transform import Rotation as R 29 | 30 | FLAGS = flags.FLAGS 31 | flags.DEFINE_string( 32 | 'anno_dir', 33 | '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/', 34 | 'input local dictionary for AIST++ annotations.') 35 | flags.DEFINE_string( 36 | 'save_dir', 37 | '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/cameras/', 38 | 'output local dictionary that stores AIST++ camera parameters.') 39 | flags.DEFINE_bool( 40 | 'visualize', False, 41 | 'Whether to visualize the cameras for debugging.') 42 | random.seed(0) 43 | np.random.seed(0) 44 | 45 | 46 | def plot_cameras(cgroup): 47 | points_world = np.array([ 48 | [40., 0., 0.], # arrow x: red 49 | [0., 40., 0.], # arrow y: green 50 | [0., 0., 40.], # arrow z: blue 51 | ]) 52 | colors = ['r', 'g', 'b'] 53 | axes_all = [ 54 | vedo.Arrows([[0, 0, 0]], [points_world[i]]).c(colors[i]) 55 | for i in range(3)] 56 | for camera in cgroup.cameras: 57 | rot_mat = cv2.Rodrigues(camera.rvec)[0] 58 | cam_center = - np.linalg.inv(rot_mat).dot(camera.tvec) 59 | points_cam = np.einsum('ij,kj->ki', np.linalg.inv(rot_mat), points_world) 60 | axes_all += [ 61 | vedo.Arrows([cam_center], [cam_center + points_cam[i]]).c(colors[i]) 62 | for i in range(3)] 63 | axes_all += [vedo.Text(camera.name, cam_center, s=10)] 64 | return axes_all 65 | 66 | 67 | def init_env_cameras(): 68 | """Trys to estimate the environment manually.""" 69 | cams = [] 70 | for i, view in enumerate(AISTDataset.VIEWS): 71 | f = 1600 72 | cx = 1920 // 2 73 | cy = 1080 // 2 74 | if view == 'c09': 75 | r1 = R.from_euler('y', 180, degrees=True) 76 | r2 = R.from_euler('z', 180, degrees=True) 77 | rvec = (r1 * r2).as_rotvec() 78 | tvec = [0, 170, 500] 79 | else: 80 | r1 = R.from_euler('y', 180 - 360 // 8 * i, degrees=True) 81 | r2 = R.from_euler('z', 180, degrees=True) 82 | rvec = (r1 * r2).as_rotvec() 83 | tvec = [0, 180, 500] 84 | 85 | matrix = np.array([ 86 | [f, 0, cx], 87 | [0, f, cy], 88 | [0, 0, 1], 89 | ], dtype=np.float32) 90 | cams.append( 91 | aniposelib.cameras.Camera( 92 | matrix=matrix, rvec=rvec, tvec=tvec, name=view, size=(1920, 1080))) 93 | cgroup = aniposelib.cameras.CameraGroup(cams) 94 | return cgroup 95 | 96 | 97 | def main(_): 98 | aist_dataset = AISTDataset(anno_dir=FLAGS.anno_dir) 99 | 100 | for env_name, seq_names in aist_dataset.mapping_env2seq.items(): 101 | # Init camera parameters 102 | cgroup = init_env_cameras() 103 | 104 | # Select a set of sequences for optimizing camera parameters. 105 | seq_names = random.choices(seq_names, k=20) 106 | 107 | # Load 2D keypoints 108 | keypoints2d_all = [] 109 | for seq_name in seq_names: 110 | keypoints2d_raw, _, _ = AISTDataset.load_keypoint2d( 111 | aist_dataset.keypoint2d_dir, seq_name=seq_name) 112 | # Special cases 113 | if seq_name == 'gBR_sBM_cAll_d04_mBR0_ch01': 114 | keypoints2d_raw[4] = np.nan # not synced view 115 | if seq_name == 'gJB_sBM_cAll_d07_mJB3_ch05': 116 | keypoints2d_raw[6] = np.nan # size 640x480 117 | keypoints2d_all.append(keypoints2d_raw) 118 | keypoints2d_all = np.concatenate(keypoints2d_all, axis=1) 119 | 120 | # Filter keypoints to select those best points 121 | kpt_thre = 0.5 122 | ignore_idxs = np.where(keypoints2d_all[:, :, :, 2] < kpt_thre) 123 | keypoints2d_all[ignore_idxs[0], ignore_idxs[1], ignore_idxs[2], :] = np.nan 124 | keypoints2d_all = keypoints2d_all[..., 0:2] 125 | 126 | # Apply bundle adjustment and dump the camera parameters 127 | nviews = keypoints2d_all.shape[0] 128 | cgroup.bundle_adjust_iter( 129 | keypoints2d_all.reshape(nviews, -1, 2), 130 | n_iters=20, 131 | n_samp_iter=500, 132 | n_samp_full=5000, 133 | verbose=True) 134 | os.makedirs(FLAGS.save_dir, exist_ok=True) 135 | camera_file = os.path.join(FLAGS.save_dir, f'{env_name}.json') 136 | with open(camera_file, 'w') as f: 137 | json.dump([camera.get_dict() for camera in cgroup.cameras], f) 138 | 139 | # visualize the world with one frame 140 | if FLAGS.visualize: 141 | print("seq_name:", seq_name) 142 | axes_all = plot_cameras(cgroup) 143 | keypoints3d = cgroup.triangulate( 144 | keypoints2d_all[:, 0].reshape(nviews, -1, 2) 145 | ).reshape(-1, 3) 146 | vedo.show( 147 | *axes_all, vedo.Points(keypoints3d, r=12), 148 | interactive=True, axes=True) 149 | vedo.clear() 150 | 151 | 152 | if __name__ == '__main__': 153 | app.run(main) 154 | -------------------------------------------------------------------------------- /processing/run_estimate_keypoints.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Estimate AIST++ 3D keypoints.""" 16 | import os 17 | import pickle 18 | 19 | from absl import app 20 | from absl import flags 21 | from absl import logging 22 | from aist_plusplus.loader import AISTDataset 23 | import numpy as np 24 | 25 | FLAGS = flags.FLAGS 26 | 27 | flags.DEFINE_list( 28 | 'sequence_names', 29 | None, 30 | 'list of sequence names to be processed. None means to process all.') 31 | flags.DEFINE_string( 32 | 'anno_dir', 33 | '/home/ruilongli/data/AIST++_openpose/', 34 | 'input local dictionary for AIST++ annotations.') 35 | flags.DEFINE_string( 36 | 'save_dir', 37 | '/home/ruilongli/data/AIST++_openpose/keypoints3d/', 38 | 'output local dictionary that stores AIST++ 3D keypoints.') 39 | flags.DEFINE_enum( 40 | 'data_type', 41 | 'openpose', 42 | ['internal', 'openpose'], 43 | 'Which openpose detector is being used.' 44 | ) 45 | 46 | np.random.seed(0) 47 | 48 | 49 | def main(_): 50 | aist_dataset = AISTDataset(anno_dir=FLAGS.anno_dir) 51 | 52 | if FLAGS.sequence_names: 53 | seq_names = FLAGS.sequence_names 54 | else: 55 | seq_names = aist_dataset.mapping_seq2env.keys() 56 | 57 | for seq_name in seq_names: 58 | logging.info('processing %s', seq_name) 59 | env_name = aist_dataset.mapping_seq2env[seq_name] 60 | 61 | # Load camera parameters 62 | cgroup = AISTDataset.load_camera_group(aist_dataset.camera_dir, env_name) 63 | 64 | # load 2D keypoints 65 | keypoints2d, det_scores, _ = AISTDataset.load_keypoint2d( 66 | aist_dataset.keypoint2d_dir, seq_name=seq_name) 67 | nviews, nframes, _, _ = keypoints2d.shape 68 | assert det_scores.shape[0] == nviews 69 | assert det_scores.shape[1] == nframes 70 | if seq_name == 'gBR_sBM_cAll_d04_mBR0_ch01': 71 | keypoints2d[4] = np.nan # not synced view 72 | if seq_name == 'gJB_sBM_cAll_d07_mJB3_ch05': 73 | keypoints2d[6] = np.nan # size 640x480 74 | 75 | # filter keypoints to select those best points 76 | kpt_thre = 0.15 77 | ignore_idxs = np.where(keypoints2d[:, :, :, 2] < kpt_thre) 78 | keypoints2d[ignore_idxs[0], ignore_idxs[1], ignore_idxs[2], :] = np.nan 79 | det_thre = 0.0 80 | ignore_idxs = np.where(det_scores < det_thre) 81 | keypoints2d[ignore_idxs[0], ignore_idxs[1], :, :] = np.nan 82 | keypoints2d = keypoints2d[:, :, :, 0:2] 83 | 84 | # 3D pose triangulation and temporal optimization. 85 | if FLAGS.data_type == "internal": 86 | # COCO-format bone constrains 87 | bones = [ 88 | (5, 7), (7, 9), (6, 8), (8, 10), (11, 13), (13, 15), (12, 14), 89 | (14, 16), (0, 1), (0, 2), (1, 2), (0, 3), (0, 4), (3, 4), 90 | ] 91 | elif FLAGS.data_type == "openpose": 92 | # https://cmu-perceptual-computing-lab.github.io/openpose/web/html/doc/md_doc_02_output.html 93 | body_bones = np.array([ 94 | (0, 15), (0, 16), (15, 17), (16, 18), 95 | (0, 1), (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7), (1, 8), 96 | (8, 9), (9, 10), (10, 11), (11, 24), (11, 22), (11, 23), (22, 23), (23, 24), (24, 22), 97 | (8, 12), (12, 13), (13, 14), (14, 21), (14, 19), (14, 20), (19, 20), (20, 21), (21, 19) 98 | ]) 99 | bones = body_bones.tolist() 100 | # hand_bones = np.array([ 101 | # (0, 1), (1, 2), (2, 3), (3, 4), 102 | # (0, 5), (5, 6), (6, 7), (7, 8), 103 | # (0, 9), (9, 10), (10, 11), (11, 12), 104 | # (0, 13), (13, 14), (14, 15), (15, 16), 105 | # (0, 17), (17, 18), (18, 19), (19, 20) 106 | # ]) 107 | # bones = np.concatenate([ 108 | # body_bones, hand_bones + 25, hand_bones + 25 + 21]).tolist() 109 | else: 110 | raise ValueError(FLAGS.data_type) 111 | keypoints3d = cgroup.triangulate( 112 | keypoints2d.reshape(nviews, -1, 2) 113 | ).reshape(nframes, -1, 3) 114 | keypoints3d_optim = cgroup.triangulate_optim( 115 | keypoints2d, constraints=bones, verbose=True 116 | ).reshape(nframes, -1, 3) 117 | 118 | # Save to pkl 119 | os.makedirs(FLAGS.save_dir, exist_ok=True) 120 | keypoints_file = os.path.join(FLAGS.save_dir, f'{seq_name}.pkl') 121 | with open(keypoints_file, 'wb') as f: 122 | pickle.dump({ 123 | 'keypoints3d': keypoints3d, 124 | 'keypoints3d_optim': keypoints3d_optim, 125 | }, f, protocol=pickle.HIGHEST_PROTOCOL) 126 | 127 | 128 | if __name__ == '__main__': 129 | app.run(main) 130 | -------------------------------------------------------------------------------- /processing/run_estimate_smpl.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Estimate AIST++ SMPL-format Motion.""" 16 | import os 17 | import pickle 18 | 19 | from absl import app 20 | from absl import flags 21 | from absl import logging 22 | from aist_plusplus.loader import AISTDataset 23 | from aist_plusplus.utils import unify_joint_mappings 24 | import numpy as np 25 | from smplx import SMPL 26 | import torch 27 | 28 | try: 29 | import vedo, trimesh 30 | SUPPORT_VIS = True 31 | except: 32 | SUPPORT_VIS = False 33 | 34 | FLAGS = flags.FLAGS 35 | flags.DEFINE_list( 36 | 'sequence_names', 37 | None, 38 | 'list of sequence names to be processed. None means to process all.') 39 | flags.DEFINE_string( 40 | 'anno_dir', 41 | '/home/ruilongli/data/AIST++_openpose/', 42 | 'input local dictionary for AIST++ annotations.') 43 | flags.DEFINE_string( 44 | 'smpl_dir', 45 | '/home/ruilongli/data/smpl_model/smpl/', 46 | 'input local dictionary that stores SMPL data.') 47 | flags.DEFINE_string( 48 | 'save_dir', 49 | '/home/ruilongli/data/AIST++_openpose/motions/', 50 | 'output local dictionary that stores AIST++ SMPL-format motion data.') 51 | flags.DEFINE_bool( 52 | 'visualize', 53 | False, 54 | 'Wether to visualize the fitting process.') 55 | flags.DEFINE_enum( 56 | 'data_type', 57 | 'openpose', 58 | ['internal', 'openpose'], 59 | 'Which openpose detector is being used.') 60 | np.random.seed(0) 61 | torch.manual_seed(0) 62 | 63 | 64 | class SMPLRegressor: 65 | """SMPL fitting based on 3D keypoints.""" 66 | 67 | def __init__(self, smpl_model_path, smpl_model_gener='MALE'): 68 | # Fitting hyper-parameters 69 | self.base_lr = 100.0 70 | self.niter = 10000 71 | self.metric = torch.nn.MSELoss() 72 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu' 73 | self.smpl_model_path = smpl_model_path 74 | self.smpl_model_gender = smpl_model_gener 75 | 76 | # Mapping to unify joint definations 77 | self.joints_mapping_smpl = unify_joint_mappings(dataset='smpl') 78 | 79 | def get_optimizer(self, smpl, step, base_lr): 80 | """Setup opimizer with a warm up learning rate.""" 81 | if step < 100: 82 | optimizer = torch.optim.SGD([ 83 | {'params': [smpl.transl], 'lr': base_lr}, 84 | {'params': [smpl.scaling], 'lr': base_lr * 0.01}, 85 | {'params': [smpl.global_orient], 'lr': 0.0}, 86 | {'params': [smpl.body_pose], 'lr': 0.0}, 87 | {'params': [smpl.betas], 'lr': 0.0}, 88 | ]) 89 | 90 | elif step < 400: 91 | optimizer = torch.optim.SGD([ 92 | {'params': [smpl.transl], 'lr': base_lr}, 93 | {'params': [smpl.scaling], 'lr': base_lr * 0.01}, 94 | {'params': [smpl.global_orient], 'lr': base_lr * 0.001}, 95 | {'params': [smpl.body_pose], 'lr': 0.0}, 96 | {'params': [smpl.betas], 'lr': 0.0}, 97 | ]) 98 | 99 | else: 100 | optimizer = torch.optim.SGD([ 101 | {'params': [smpl.transl], 'lr': base_lr}, 102 | {'params': [smpl.scaling], 'lr': base_lr * 0.01}, 103 | {'params': [smpl.global_orient], 'lr': base_lr * 0.001}, 104 | {'params': [smpl.body_pose], 'lr': base_lr * 0.001}, 105 | {'params': [smpl.betas], 'lr': 0.0}, 106 | ]) 107 | return optimizer 108 | 109 | def fit(self, keypoints3d, dtype='coco', verbose=True): 110 | """Run fitting to optimize the SMPL parameters.""" 111 | assert len(keypoints3d.shape) == 3, 'input shape should be [N, njoints, 3]' 112 | mapping_target = unify_joint_mappings(dataset=dtype) 113 | keypoints3d = keypoints3d[:, mapping_target, :] 114 | keypoints3d = torch.from_numpy(keypoints3d).float().to(self.device) 115 | batch_size, njoints = keypoints3d.shape[0:2] 116 | 117 | # Init learnable smpl model 118 | smpl = SMPL( 119 | model_path=self.smpl_model_path, 120 | gender=self.smpl_model_gender, 121 | batch_size=batch_size).to(self.device) 122 | 123 | # Start fitting 124 | for step in range(self.niter): 125 | optimizer = self.get_optimizer(smpl, step, self.base_lr) 126 | 127 | output = smpl.forward() 128 | joints = output.joints[:, self.joints_mapping_smpl[:njoints], :] 129 | loss = self.metric(joints, keypoints3d) 130 | 131 | optimizer.zero_grad() 132 | loss.backward() 133 | optimizer.step() 134 | 135 | if verbose and step % 10 == 0: 136 | logging.info(f'step {step:03d}; loss {loss.item():.3f};') 137 | 138 | if FLAGS.visualize: 139 | vertices = output.vertices[0].detach().cpu().numpy() # first frame 140 | mesh = trimesh.Trimesh(vertices, smpl.faces) 141 | mesh.visual.face_colors = [200, 200, 250, 100] 142 | pts = vedo.Points(keypoints3d[0].detach().cpu().numpy(), r=20) # first frame 143 | vedo.show(mesh, pts, interactive=False) 144 | 145 | # Return results 146 | return smpl, loss.item() 147 | 148 | 149 | def main(_): 150 | if FLAGS.visualize: 151 | assert SUPPORT_VIS, "--visualize is not support! Fail to import vedo or trimesh." 152 | 153 | aist_dataset = AISTDataset(FLAGS.anno_dir) 154 | smpl_regressor = SMPLRegressor(FLAGS.smpl_dir, 'MALE') 155 | 156 | if FLAGS.sequence_names: 157 | seq_names = FLAGS.sequence_names 158 | else: 159 | seq_names = aist_dataset.mapping_seq2env.keys() 160 | 161 | for seq_name in seq_names: 162 | logging.info('processing %s', seq_name) 163 | 164 | # load 3D keypoints 165 | keypoints3d = AISTDataset.load_keypoint3d( 166 | aist_dataset.keypoint3d_dir, seq_name, use_optim=True) 167 | 168 | # SMPL fitting 169 | if FLAGS.data_type == "internal": 170 | smpl, loss = smpl_regressor.fit(keypoints3d, dtype='coco', verbose=True) 171 | elif FLAGS.data_type == "openpose": 172 | smpl, loss = smpl_regressor.fit(keypoints3d, dtype='openpose25', verbose=True) 173 | else: 174 | raise ValueError(FLAGS.data_type) 175 | 176 | # One last time forward 177 | with torch.no_grad(): 178 | _ = smpl.forward() 179 | body_pose = smpl.body_pose.detach().cpu().numpy() 180 | global_orient = smpl.global_orient.detach().cpu().numpy() 181 | smpl_poses = np.concatenate([global_orient, body_pose], axis=1) 182 | smpl_scaling = smpl.scaling.detach().cpu().numpy() 183 | smpl_trans = smpl.transl.detach().cpu().numpy() 184 | 185 | os.makedirs(FLAGS.save_dir, exist_ok=True) 186 | motion_file = os.path.join(FLAGS.save_dir, f'{seq_name}.pkl') 187 | with open(motion_file, 'wb') as f: 188 | pickle.dump({ 189 | 'smpl_poses': smpl_poses, 190 | 'smpl_scaling': smpl_scaling, 191 | 'smpl_trans': smpl_trans, 192 | 'smpl_loss': loss, 193 | }, f, protocol=pickle.HIGHEST_PROTOCOL) 194 | 195 | 196 | if __name__ == '__main__': 197 | app.run(main) 198 | -------------------------------------------------------------------------------- /processing/run_openpose.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Detect frame-by-frame 2D keypoints using openpose.""" 16 | import os 17 | import logging 18 | 19 | from absl import app 20 | from absl import flags 21 | from aist_plusplus.loader import AISTDataset 22 | from aist_plusplus.utils import ffmpeg_video_to_images 23 | 24 | FLAGS = flags.FLAGS 25 | flags.DEFINE_list( 26 | 'sequence_names', 27 | None, 28 | 'list of sequence names to be processed. None means to process all.') 29 | flags.DEFINE_string( 30 | 'anno_dir', 31 | '/home/ruilongli/data/AIST++_openpose/', 32 | 'input local dictionary for AIST++ annotations.') 33 | flags.DEFINE_string( 34 | 'openpose_dir', 35 | '/home/ruilongli/workspace/openpose', 36 | 'input openpose repo that contains the executable.') 37 | flags.DEFINE_string( 38 | 'video_dir', 39 | '/home/ruilongli/data/AIST/videos/10M/', 40 | 'input local dictionary for AIST Dance Videos.') 41 | flags.DEFINE_string( 42 | 'image_save_dir', 43 | '/home/ruilongli/data/AIST/images/10M/', 44 | 'output local dictionary that stores AIST images.') 45 | flags.DEFINE_string( 46 | 'openpose_save_dir', 47 | '/home/ruilongli/data/AIST++_openpose/openpose', 48 | 'output local dictionary that stores AIST++ openpose results.') 49 | 50 | 51 | def main(_): 52 | os.makedirs(FLAGS.image_save_dir, exist_ok=True) 53 | os.makedirs(FLAGS.openpose_save_dir, exist_ok=True) 54 | 55 | if FLAGS.sequence_names: 56 | seq_names = FLAGS.sequence_names 57 | else: 58 | aist_dataset = AISTDataset(FLAGS.anno_dir) 59 | seq_names = aist_dataset.mapping_seq2env.keys() 60 | 61 | for seq_name in seq_names: 62 | for view in AISTDataset.VIEWS: 63 | video_name = AISTDataset.get_video_name(seq_name, view) 64 | video_file = os.path.join(FLAGS.video_dir, video_name + ".mp4") 65 | if not os.path.exists(video_file): 66 | continue 67 | logging.info('processing %s', video_file) 68 | 69 | # extract images 70 | image_dir = os.path.join(FLAGS.image_save_dir, video_name) 71 | ffmpeg_video_to_images(video_file, image_dir, fps=60) 72 | 73 | # extract keypoints 74 | save_dir = os.path.join(FLAGS.openpose_save_dir, video_name) 75 | os.system( 76 | "cd %s; " % FLAGS.openpose_dir + 77 | "./build/examples/openpose/openpose.bin " + 78 | "--image_dir %s " % image_dir + 79 | "--write_json %s " % save_dir + 80 | "--display 0 --hand --face --render_pose 0" 81 | ) 82 | 83 | if __name__ == '__main__': 84 | app.run(main) 85 | -------------------------------------------------------------------------------- /processing/run_preprocessing.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Process frame-by-frame keypoints detection results to pkl.""" 16 | import glob 17 | import json 18 | import multiprocessing 19 | import os 20 | import pickle 21 | 22 | from absl import app 23 | from absl import flags 24 | from absl import logging 25 | from aist_plusplus.loader import AISTDataset 26 | import numpy as np 27 | 28 | FLAGS = flags.FLAGS 29 | 30 | flags.DEFINE_list( 31 | 'sequence_names', 32 | None, 33 | 'list of sequence names to be processed. None means to process all.') 34 | flags.DEFINE_string( 35 | 'keypoints_dir', 36 | '/home/ruilongli/data/AIST++_openpose/openpose/', 37 | 'input local dictionary that stores 2D keypoints detection results in json.' 38 | ) 39 | flags.DEFINE_string( 40 | 'save_dir', 41 | '/home/ruilongli/data/AIST++_openpose/keypoints2d/', 42 | 'output local dictionary that stores 2D keypoints detection results in pkl.' 43 | ) 44 | flags.DEFINE_enum( 45 | 'data_type', 46 | 'openpose', 47 | ['internal', 'openpose'], 48 | 'Which openpose detector is being used.' 49 | ) 50 | 51 | 52 | def array_nan(shape, dtype=np.float32): 53 | array = np.empty(shape, dtype=dtype) 54 | array[:] = np.nan 55 | return array 56 | 57 | 58 | def load_keypoints2d_file(file_path): 59 | """load 2D keypoints from keypoint detection results. 60 | 61 | Only one person is extracted from the results. If there are multiple 62 | persons in the prediction results, we select the one with the highest 63 | detection score. 64 | 65 | Args: 66 | file_path: the json file path. 67 | njoints: number of joints in the keypoint defination. 68 | 69 | Returns: 70 | A `np.array` with the shape of [njoints, 3]. 71 | """ 72 | if FLAGS.data_type == "internal": 73 | njoints = 17 74 | elif FLAGS.data_type == "openpose": 75 | njoints = 25 76 | else: 77 | raise ValueError(FLAGS.data_type) 78 | 79 | keypoint = array_nan((njoints, 3), dtype=np.float32) 80 | det_score = 0.0 81 | 82 | try: 83 | with open(file_path, 'r') as f: 84 | data = json.load(f) 85 | except Exception as e: # pylint: disable=broad-except 86 | logging.warning(e) 87 | return keypoint, det_score 88 | 89 | if FLAGS.data_type == "internal": 90 | keypoints = np.array(data['keypoints']).reshape((-1, njoints, 3)) 91 | det_scores = np.array(data['detection_scores']) 92 | elif FLAGS.data_type == "openpose": 93 | keypoints = [] 94 | for person in data["people"]: 95 | # npoints: 25, 70, 21, 21 96 | # for key in ["pose", "face", "hand_left", "hand_right"]: 97 | for key in ["pose"]: 98 | keypoints.extend(person["%s_keypoints_2d" % key]) 99 | keypoints = np.array(keypoints).reshape(len(data["people"]), -1, 3) 100 | assert keypoints.shape[1] == njoints, ( 101 | "The shape is not right. %s v.s. %d" (str(keypoints.shape), njoints) 102 | ) 103 | det_scores = np.mean(keypoints[:, 0:25, -1], axis=-1) 104 | else: 105 | raise ValueError(FLAGS.data_type) 106 | 107 | # The detection results may contain zero person or multiple people. 108 | if det_scores.shape[0] == 0: 109 | # There is no person in this image. We set NaN to this frame. 110 | return keypoint, det_score 111 | else: 112 | # There are multiple people (>=1) in this image. We select the one with 113 | # the highest detection score. 114 | idx = np.argmax(det_scores) 115 | keypoint = keypoints[idx] 116 | det_score = det_scores[idx] 117 | return keypoint, det_score 118 | 119 | 120 | def load_keypoints2d(data_dir, seq_name): 121 | """Load 2D keypoints predictions for a set of multi-view videos.""" 122 | # Parsing sequence name to multi-view video names 123 | video_names = [AISTDataset.get_video_name(seq_name, view) 124 | for view in AISTDataset.VIEWS] 125 | 126 | # In case frames are missing, we first scan all views to get a union 127 | # of timestamps. 128 | paths_cache = {} 129 | timestamps = [] 130 | for video_name in video_names: 131 | paths = sorted(glob.glob(os.path.join(data_dir, video_name, '*.json'))) 132 | paths_cache[video_name] = paths 133 | if FLAGS.data_type == "internal": 134 | timestamps += [ 135 | int(os.path.basename(p).split('.')[0].split('_')[-1]) for p in paths] 136 | elif FLAGS.data_type == "openpose": 137 | timestamps += [ 138 | int(os.path.basename(p).split('.')[0].split('_')[0]) for p in paths] 139 | else: 140 | raise ValueError(FLAGS.data_type) 141 | timestamps = np.array(sorted(list(set(timestamps)))) # (N,) 142 | 143 | # Then we load all frames according to timestamps. 144 | keypoints2d = [] 145 | det_scores = [] 146 | for video_name in video_names: 147 | if FLAGS.data_type == "internal": 148 | paths = [ 149 | os.path.join(data_dir, video_name, f'{video_name}_{ts}.json') 150 | for ts in timestamps 151 | ] 152 | elif FLAGS.data_type == "openpose": 153 | paths = [ 154 | os.path.join(data_dir, video_name, f'{ts:08d}_keypoints.json') 155 | for ts in timestamps 156 | ] 157 | else: 158 | raise ValueError(FLAGS.data_type) 159 | keypoints2d_per_view = [] 160 | det_scores_per_view = [] 161 | for path in paths: 162 | keypoint, det_score = load_keypoints2d_file(path) 163 | keypoints2d_per_view.append(keypoint) 164 | det_scores_per_view.append(det_score) 165 | keypoints2d.append(keypoints2d_per_view) 166 | det_scores.append(det_scores_per_view) 167 | 168 | keypoints2d = np.array( 169 | keypoints2d, dtype=np.float32) # (nviews, N, njoints, 3) 170 | det_scores = np.array( 171 | det_scores, dtype=np.float32) # (nviews, N) 172 | return keypoints2d, det_scores, timestamps 173 | 174 | 175 | def process_and_save(seq_name): 176 | keypoints2d, det_scores, timestamps = load_keypoints2d( 177 | FLAGS.keypoints_dir, seq_name=seq_name) 178 | os.makedirs(FLAGS.save_dir, exist_ok=True) 179 | save_path = os.path.join(FLAGS.save_dir, f'{seq_name}.pkl') 180 | with open(save_path, 'wb') as f: 181 | pickle.dump({ 182 | 'keypoints2d': keypoints2d, 183 | 'det_scores': det_scores, 184 | 'timestamps': timestamps, 185 | }, f, protocol=pickle.HIGHEST_PROTOCOL) 186 | 187 | 188 | def main(_): 189 | if FLAGS.sequence_names: 190 | seq_names = FLAGS.sequence_names 191 | else: 192 | aist_dataset = AISTDataset(FLAGS.anno_dir) 193 | seq_names = aist_dataset.mapping_seq2env.keys() 194 | 195 | pool = multiprocessing.Pool(16) 196 | pool.map(process_and_save, seq_names) 197 | 198 | 199 | if __name__ == '__main__': 200 | app.run(main) 201 | 202 | -------------------------------------------------------------------------------- /processing/run_segmentation.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Estimate foreground masks.""" 16 | import os 17 | 18 | from absl import app 19 | from absl import flags 20 | from absl import logging 21 | from aist_plusplus.loader import AISTDataset 22 | from aist_plusplus.utils import ffmpeg_video_read 23 | import numpy as np 24 | import torch 25 | import imageio 26 | import tqdm 27 | 28 | FLAGS = flags.FLAGS 29 | flags.DEFINE_list( 30 | 'sequence_names', 31 | None, 32 | 'list of sequence names to be processed. None means to process all.') 33 | flags.DEFINE_string( 34 | 'anno_dir', 35 | '/home/ruilongli/data/AIST++_openpose/', 36 | 'input local dictionary for AIST++ annotations.') 37 | flags.DEFINE_string( 38 | 'video_dir', 39 | '/home/ruilongli/data/AIST/videos/10M/', 40 | 'input local dictionary for AIST Dance Videos.') 41 | flags.DEFINE_string( 42 | 'save_dir', 43 | '/home/ruilongli/data/AIST++_openpose/segmentation/', 44 | 'output local dictionary that stores AIST++ segmentation masks.') 45 | np.random.seed(0) 46 | 47 | 48 | def estimate_background(input_video: str, alpha_video: str, output_image: str): 49 | video_reader = imageio.get_reader(input_video) 50 | alpha_reader = imageio.get_reader(alpha_video) 51 | background, weights = 0, 0 52 | for img, alpha in tqdm.tqdm(zip(video_reader, alpha_reader)): 53 | weight = (1 - np.float32(alpha) / 255.0) 54 | weights += weight 55 | background += np.float32(img) * weight 56 | background /= (weights + 1e-8) 57 | imageio.imwrite(output_image, np.uint8(background)) 58 | 59 | 60 | def main(_): 61 | # Here we use https://github.com/PeterL1n/RobustVideoMatting (GPL-3.0 License) 62 | # to get an initial alpha matting prediction. 63 | model = torch.hub.load("PeterL1n/RobustVideoMatting", "resnet50").cuda() 64 | converter = torch.hub.load("PeterL1n/RobustVideoMatting", "converter") 65 | 66 | # Here we use https://github.com/PeterL1n/BackgroundMattingV2 (MIT License) 67 | # to get an accurate alpha matting. 68 | if not os.path.exists("/tmp/model.pth"): 69 | os.system("gdown https://drive.google.com/uc?id=1ErIAsB_miVhYL9GDlYUmfbqlV293mSYf -O /tmp/model.pth -q") 70 | if not os.path.exists("/tmp/BackgroundMattingV2"): 71 | os.system("cd /tmp/; git clone -q https://github.com/PeterL1n/BackgroundMattingV2") 72 | 73 | if FLAGS.sequence_names: 74 | seq_names = FLAGS.sequence_names 75 | else: 76 | aist_dataset = AISTDataset(FLAGS.anno_dir) 77 | seq_names = aist_dataset.mapping_seq2env.keys() 78 | 79 | os.makedirs(FLAGS.save_dir, exist_ok=True) 80 | for seq_name in seq_names: 81 | for view in AISTDataset.VIEWS: 82 | video_name = AISTDataset.get_video_name(seq_name, view) 83 | video_file = os.path.join(FLAGS.video_dir, video_name + ".mp4") 84 | if not os.path.exists(video_file): 85 | continue 86 | 87 | # step 1. initial alpha matting prediction (not accurate enough). 88 | logging.info('processing %s', video_file) 89 | alpha_file = os.path.join(FLAGS.save_dir, video_name + "_alpha1.mp4") 90 | if not os.path.exists(alpha_file): 91 | converter( 92 | model, # The loaded model, can be on any device (cpu or cuda). 93 | input_source=video_file, # A video file or an image sequence directory. 94 | downsample_ratio=None, # [Optional] If None, make downsampled max size be 512px. 95 | output_type='video', # Choose "video" or "png_sequence" 96 | output_alpha=alpha_file, # [Optional] Output the raw alpha prediction. 97 | output_video_mbps=4, # Output video mbps. Not needed for png sequence. 98 | seq_chunk=12, # Process n frames at once for better parallelism. 99 | num_workers=1, # Only for image sequence input. Reader threads. 100 | progress=True # Print conversion progress. 101 | ) 102 | 103 | # step 2. estimate the background image from the inital alpha matting prediction. 104 | background_file = os.path.join(FLAGS.save_dir, video_name + "_bg.png") 105 | if not os.path.exists(background_file): 106 | estimate_background(video_file, alpha_file, background_file) 107 | 108 | # step 3. estimate the more accurate alpha matting. 109 | final_file = os.path.join(FLAGS.save_dir, video_name + "_alpha2") 110 | if not os.path.exists(final_file): 111 | os.system( 112 | "cd /tmp/BackgroundMattingV2/; " + 113 | "python inference_video.py " + 114 | "--model-type mattingrefine " + 115 | "--model-backbone resnet50 " + 116 | "--model-backbone-scale 0.25 " + 117 | "--model-refine-mode sampling " + 118 | "--model-refine-sample-pixels 80000 " + 119 | "--model-checkpoint '/tmp/model.pth' " + 120 | "--video-src '%s' " % video_file + 121 | "--video-bgr '%s' " % background_file + 122 | "--output-dir '%s' " % final_file + 123 | "--output-type pha" 124 | ) 125 | if os.path.exists(final_file): 126 | os.system("mv %s/pha.mp4 %s.mp4; rm -rf %s" % (final_file, final_file, final_file)) 127 | 128 | 129 | if __name__ == '__main__': 130 | app.run(main) 131 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.9.0 2 | numpy 3 | torch 4 | torchvision 5 | opencv-python 6 | git+https://github.com/liruilong940607/aniposelib 7 | git+https://github.com/liruilong940607/smplx 8 | ffmpeg-python 9 | imageio 10 | imageio-ffmpeg 11 | gdown -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Perception Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import setuptools 16 | 17 | INSTALL_REQUIREMENTS = [ 18 | 'absl-py', 'numpy', 'opencv-python', 'ffmpeg-python'] 19 | 20 | setuptools.setup( 21 | name='aist_plusplus_api', 22 | url='https://github.com/google/aistplusplus_api', 23 | description='API for supporting AIST++ Dataset.', 24 | version='1.1.0', 25 | author='Ruilong Li', 26 | author_email='ruilongli94@gmail.com', 27 | packages=setuptools.find_packages(), 28 | install_requires=INSTALL_REQUIREMENTS 29 | ) 30 | --------------------------------------------------------------------------------