├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── aist_plusplus
    ├── __init__.py
    ├── features
    │   ├── __init__.py
    │   ├── kinetic.py
    │   ├── manual.py
    │   └── utils.py
    ├── loader.py
    ├── utils.py
    └── visualizer.py
├── assets
    └── aist_pipeline.jpg
├── demos
    ├── extract_motion_feats.py
    ├── run_dyn_processing.py
    ├── run_openpose_pipeline.sh
    └── run_vis.py
├── downloader.py
├── processing
    ├── requirements.txt
    ├── run_estimate_camera.py
    ├── run_estimate_keypoints.py
    ├── run_estimate_smpl.py
    ├── run_openpose.py
    ├── run_preprocessing.py
    └── run_segmentation.py
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | # lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | data/
107 | 
108 | .DS_Store


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution,
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AIST++ API
  2 |  
  3 | This repo contains starter code for using the AIST++ dataset. To download the
  4 | dataset or explore details of this dataset, please go to our dataset [website](https://google.github.io/aistplusplus_dataset).
  5 | 
  6 | ## Installation
  7 | The code has been tested on `python>=3.7`. You can install the dependencies and this repo by:
  8 | ``` bash
  9 | pip install -r requirements.txt
 10 | python setup.py install
 11 | ```
 12 | You also need to make sure [ffmpeg](https://ffmpeg.org/download.html) is installed on your machine, if you would like to visualize the annotations using this api.
 13 | 
 14 | ## How to use
 15 | We provide demo code for loading and visualizing AIST++ annotations. 
 16 | Note [AIST++ annotations and
 17 | videos](https://google.github.io/aistplusplus_dataset/download.html),
 18 | as well as the [SMPL model](https://smpl.is.tue.mpg.de/en) (for SMPL visualization only) are required to run the demo code.
 19 | 
 20 | The directory structure of the data is expected to be:
 21 | ```
 22 | <ANNOTATIONS_DIR>
 23 | ├── motions/
 24 | ├── keypoints2d/
 25 | ├── keypoints3d/
 26 | ├── splits/
 27 | ├── cameras/
 28 | └── ignore_list.txt
 29 | 
 30 | <VIDEO_DIR>
 31 | └── *.mp4
 32 | 
 33 | <SMPL_DIR>
 34 | ├── SMPL_MALE.pkl
 35 | └── SMPL_FEMALE.pkl
 36 | ```
 37 | 
 38 | #### Visualize 2D keypoints annotation
 39 | The command below will plot 2D keypoints onto the raw video and save it to the
 40 | directory `./visualization/`.
 41 | ``` bash
 42 | python demos/run_vis.py \
 43 |   --anno_dir <ANNOTATIONS_DIR> \
 44 |   --video_dir <VIDEO_DIR> \
 45 |   --save_dir ./visualization/ \
 46 |   --video_name gWA_sFM_c01_d27_mWA2_ch21 \
 47 |   --mode 2D
 48 | ```
 49 | 
 50 | #### Visualize 3D keypoints annotation
 51 | The command below will project 3D keypoints onto the raw video using camera parameters, and save it to the
 52 | directory `./visualization/`.
 53 | ``` bash
 54 | python demos/run_vis.py \
 55 |   --anno_dir <ANNOTATIONS_DIR> \
 56 |   --video_dir <VIDEO_DIR> \
 57 |   --save_dir ./visualization/ \
 58 |   --video_name gWA_sFM_c01_d27_mWA2_ch21 \
 59 |   --mode 3D
 60 | ```
 61 | 
 62 | #### Visualize the SMPL joints annotation
 63 | The command below will first calculate the SMPL joint locations from our motion
 64 | annotations (joint rotations and root trajectories), then project them onto the
 65 | raw video and plot. The result will be saved into the directory
 66 | `./visualization/`.
 67 | ``` bash
 68 | python demos/run_vis.py \
 69 |   --anno_dir <ANNOTATIONS_DIR> \
 70 |   --video_dir <VIDEO_DIR> \ 
 71 |   --smpl_dir <SMPL_DIR> \
 72 |   --save_dir ./visualization/ \ 
 73 |   --video_name gWA_sFM_c01_d27_mWA2_ch21 \ 
 74 |   --mode SMPL
 75 | ```
 76 | 
 77 | #### Visualize the SMPL Mesh
 78 | The command below will calculate the first frame SMPL mesh from our motion
 79 | annotations (joint rotations and root trajectories), and visualize in 3D. 
 80 | ``` bash
 81 | # install some additional libraries for 3D mesh visualization
 82 | pip install vedo trimesh
 83 | 
 84 | python demos/run_vis.py \
 85 |   --anno_dir <ANNOTATIONS_DIR> \
 86 |   --smpl_dir <SMPL_DIR> \
 87 |   --video_name gWA_sFM_c01_d27_mWA2_ch21 \ 
 88 |   --mode SMPLMesh
 89 | ```
 90 | 
 91 | #### Extract SMPL motion features
 92 | The command below will calculate and print two types of features for a motion sequence in SMPL format. We take reference from [fairmotion](https://github.com/facebookresearch/fairmotion/tree/master/fairmotion/tasks/clustering) to calculate the features.
 93 | ``` bash
 94 | python demos/extract_motion_feats.py \
 95 |   --anno_dir <ANNOTATIONS_DIR> \
 96 |   --smpl_dir <SMPL_DIR> \
 97 |   --video_name gWA_sFM_c01_d27_mWA2_ch21
 98 | ```
 99 | 
100 | #### Multi-view 3D keypoints and motion reconstruction
101 | 
102 | This repo also provides code we used for constructing this dataset from the
103 | multi-view [AIST Dance Video Database](https://aistdancedb.ongaaccel.jp/). The
104 | construction pipeline starts with frame-by-frame 2D keypoint detection and
105 | manual camera estimation. Then triangulation and bundle adjustment are applied to optimize the
106 | camera parameters as well as the 3D keypoints. Finally we sequentially fit the SMPL model to 3D keypoints to get a motion sequence represented using joint angles and a root trajectory. The following figure shows our pipeline overview.
107 | 
108 | <div align="center">
109 | <img src="assets/aist_pipeline.jpg" width="1000px"/>
110 | <p> AIST++ construction pipeline overview.</p>
111 | </div>
112 | 
113 | The annotations in AIST++ are in [COCO-format](https://cocodataset.org/#home) for 2D \& 3D keypoints, and
114 | [SMPL-format](https://smpl.is.tue.mpg.de/) for human motion annotations. It is designed to serve general
115 | research purposes. However, in some cases you might need the data in different format
116 | (e.g., [Openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) / 
117 | [Alphapose](https://github.com/MVIG-SJTU/AlphaPose) keypoints format, or [STAR](https://star.is.tue.mpg.de/) human motion
118 | format). **With the code we provide, it should be easy to construct your own
119 | version of AIST++, with your own keypoint detector or human model definition.**
120 | 
121 | **Step 1.** Assume you have your own 2D keypoint detection results stored in `<KEYPOINTS_DIR>`, you can start by preprocessing the keypoints into the `.pkl` format that we support. The code we used at this step is as follows but you might need to modify the script `run_preprocessing.py` in order to be compatible with your own data.
122 | ``` bash
123 | python processing/run_preprocessing.py \
124 |   --keypoints_dir <KEYPOINTS_DIR> \
125 |   --save_dir <ANNOTATIONS_DIR>/keypoints2d/
126 | ```
127 | 
128 | **Step 2.** Then you can estimate the camera parameters using your 2D keypoints. This step
129 | is optional as you can still use our camera parameter estimates which are
130 | quite accurate. At this step, you will need the `<ANNOTATIONS_DIR>/cameras/mapping.txt` file which stores the mapping from videos to different environment settings.
131 | ``` bash
132 | # install some additional libraries
133 | pip install -r processing/requirements.txt
134 | 
135 | # If you would like to estimate your own camera parameters:
136 | python processing/run_estimate_camera.py \
137 |   --anno_dir <ANNOTATIONS_DIR> \
138 |   --save_dir <ANNOTATIONS_DIR>/cameras/
139 | # Or you can skip this step by just using our camera parameter estimates.
140 | ```
141 | 
142 | **Step 3.** Next step is to perform 3D keypoints reconstruction from multi-view 2D keypoints
143 | and camera parameters. You can just run:
144 | ``` bash
145 | python processing/run_estimate_keypoints.py \
146 |   --anno_dir <ANNOTATIONS_DIR> \
147 |   --save_dir <ANNOTATIONS_DIR>/keypoints3d/
148 | ```
149 | 
150 | **Step 4.** Finally we can estimate SMPL-format human motion data by fitting
151 | the 3D keypoints to the SMPL model. If you would like to use another human model such
152 | as [STAR](https://star.is.tue.mpg.de/), you will need to do some modifications in the script
153 | `run_estimate_smpl.py`. The following command runs SMPL fitting.
154 | ``` bash
155 | python processing/run_estimate_smpl.py \
156 |   --anno_dir <ANNOTATIONS_DIR> \
157 |   --smpl_dir <SMPL_DIR> \
158 |   --save_dir <ANNOTATIONS_DIR>/motions/
159 | ```
160 | Note that this step will take several days to process the entire dataset if your machine has only one GPU.
161 | In practise, we run this step on a cluster, but are only able to provide the single-threaded version.
162 | 
163 | #### MISC.
164 | - COCO-format keypoint definition:
165 | ```
166 | [
167 | "nose", 
168 | "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder","right_shoulder", 
169 | "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", 
170 | "left_knee", "right_knee", "left_ankle", "right_ankle"
171 | ]
172 | ```
173 | 
174 | - SMPL-format body joint definition:
175 | ```
176 | [
177 | "root",     
178 | "lhip", "rhip", "belly",    
179 | "lknee", "rknee", "spine",    
180 | "lankle", "rankle", "chest",     
181 | "ltoes", "rtoes", "neck", 
182 | "linshoulder", "rinshoulder",     
183 | "head",  "lshoulder", "rshoulder",      
184 | "lelbow", "relbow",      
185 | "lwrist", "rwrist",     
186 | "lhand", "rhand",
187 | ]
188 | ```
189 | 


--------------------------------------------------------------------------------
/aist_plusplus/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/aistplusplus_api/2dd7b3e946b794fd0081c98e2e2433545abf8b87/aist_plusplus/__init__.py


--------------------------------------------------------------------------------
/aist_plusplus/features/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/aistplusplus_api/2dd7b3e946b794fd0081c98e2e2433545abf8b87/aist_plusplus/features/__init__.py


--------------------------------------------------------------------------------
/aist_plusplus/features/kinetic.py:
--------------------------------------------------------------------------------
  1 | # BSD License
  2 | 
  3 | # For fairmotion software
  4 | 
  5 | # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
  6 | # Modified by Ruilong Li
  7 | 
  8 | # Redistribution and use in source and binary forms, with or without modification,
  9 | # are permitted provided that the following conditions are met:
 10 | 
 11 | #  * Redistributions of source code must retain the above copyright notice, this
 12 | #    list of conditions and the following disclaimer.
 13 | 
 14 | #  * Redistributions in binary form must reproduce the above copyright notice,
 15 | #    this list of conditions and the following disclaimer in the documentation
 16 | #    and/or other materials provided with the distribution.
 17 | 
 18 | #  * Neither the name Facebook nor the names of its contributors may be used to
 19 | #    endorse or promote products derived from this software without specific
 20 | #    prior written permission.
 21 | 
 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 23 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 24 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 25 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 26 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 28 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 29 | # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 30 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 31 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 32 | import numpy as np
 33 | from . import utils as feat_utils
 34 | 
 35 | 
 36 | def extract_kinetic_features(positions):
 37 |     assert len(positions.shape) == 3  # (seq_len, n_joints, 3) 
 38 |     features = KineticFeatures(positions)
 39 |     kinetic_feature_vector = []
 40 |     for i in range(positions.shape[1]):
 41 |         feature_vector = np.hstack(
 42 |             [
 43 |                 features.average_kinetic_energy_horizontal(i),
 44 |                 features.average_kinetic_energy_vertical(i),
 45 |                 features.average_energy_expenditure(i),
 46 |             ]
 47 |         )
 48 |         kinetic_feature_vector.extend(feature_vector)
 49 |     kinetic_feature_vector = np.array(kinetic_feature_vector, dtype=np.float32)
 50 |     return kinetic_feature_vector
 51 | 
 52 | 
 53 | class KineticFeatures:
 54 |     def __init__(
 55 |         self, positions, frame_time=1./60, up_vec="y", sliding_window=2
 56 |     ):
 57 |         self.positions = positions
 58 |         self.frame_time = frame_time
 59 |         self.up_vec = up_vec
 60 |         self.sliding_window = sliding_window
 61 | 
 62 |     def average_kinetic_energy(self, joint):
 63 |         average_kinetic_energy = 0
 64 |         for i in range(1, len(self.positions)):
 65 |             average_velocity = feat_utils.calc_average_velocity(
 66 |                 self.positions, i, joint, self.sliding_window, self.frame_time
 67 |             )
 68 |             average_kinetic_energy += average_velocity ** 2
 69 |         average_kinetic_energy = average_kinetic_energy / (
 70 |             len(self.positions) - 1.0
 71 |         )
 72 |         return average_kinetic_energy
 73 | 
 74 |     def average_kinetic_energy_horizontal(self, joint):
 75 |         val = 0
 76 |         for i in range(1, len(self.positions)):
 77 |             average_velocity = feat_utils.calc_average_velocity_horizontal(
 78 |                 self.positions,
 79 |                 i,
 80 |                 joint,
 81 |                 self.sliding_window,
 82 |                 self.frame_time,
 83 |                 self.up_vec,
 84 |             )
 85 |             val += average_velocity ** 2
 86 |         val = val / (len(self.positions) - 1.0)
 87 |         return val
 88 | 
 89 |     def average_kinetic_energy_vertical(self, joint):
 90 |         val = 0
 91 |         for i in range(1, len(self.positions)):
 92 |             average_velocity = feat_utils.calc_average_velocity_vertical(
 93 |                 self.positions,
 94 |                 i,
 95 |                 joint,
 96 |                 self.sliding_window,
 97 |                 self.frame_time,
 98 |                 self.up_vec,
 99 |             )
100 |             val += average_velocity ** 2
101 |         val = val / (len(self.positions) - 1.0)
102 |         return val
103 | 
104 |     def average_energy_expenditure(self, joint):
105 |         val = 0.0
106 |         for i in range(1, len(self.positions)):
107 |             val += feat_utils.calc_average_acceleration(
108 |                 self.positions, i, joint, self.sliding_window, self.frame_time
109 |             )
110 |         val = val / (len(self.positions) - 1.0)
111 |         return val
112 | 


--------------------------------------------------------------------------------
/aist_plusplus/features/manual.py:
--------------------------------------------------------------------------------
  1 | # BSD License
  2 | 
  3 | # For fairmotion software
  4 | 
  5 | # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
  6 | # Modified by Ruilong Li
  7 | 
  8 | # Redistribution and use in source and binary forms, with or without modification,
  9 | # are permitted provided that the following conditions are met:
 10 | 
 11 | #  * Redistributions of source code must retain the above copyright notice, this
 12 | #    list of conditions and the following disclaimer.
 13 | 
 14 | #  * Redistributions in binary form must reproduce the above copyright notice,
 15 | #    this list of conditions and the following disclaimer in the documentation
 16 | #    and/or other materials provided with the distribution.
 17 | 
 18 | #  * Neither the name Facebook nor the names of its contributors may be used to
 19 | #    endorse or promote products derived from this software without specific
 20 | #    prior written permission.
 21 | 
 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 23 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 24 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 25 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 26 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 28 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 29 | # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 30 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 31 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 32 | import numpy as np
 33 | from . import utils as feat_utils
 34 | 
 35 | 
 36 | SMPL_JOINT_NAMES = [
 37 |     "root",     
 38 |     "lhip", "rhip", "belly",    
 39 |     "lknee", "rknee", "spine",    
 40 |     "lankle", "rankle", "chest",     
 41 |     "ltoes", "rtoes", "neck", 
 42 |     "linshoulder", "rinshoulder",     
 43 |     "head",  "lshoulder", "rshoulder",      
 44 |     "lelbow", "relbow",      
 45 |     "lwrist", "rwrist",     
 46 |     "lhand", "rhand",
 47 | ]
 48 | 
 49 | 
 50 | def extract_manual_features(positions):
 51 |     assert len(positions.shape) == 3  # (seq_len, n_joints, 3) 
 52 |     features = []
 53 |     f = ManualFeatures(positions)
 54 |     for _ in range(1, positions.shape[0]):
 55 |         pose_features = []
 56 |         pose_features.append(
 57 |             f.f_nmove("neck", "rhip", "lhip", "rwrist", 1.8 * f.hl)
 58 |         )
 59 |         pose_features.append(
 60 |             f.f_nmove("neck", "lhip", "rhip", "lwrist", 1.8 * f.hl)
 61 |         )
 62 |         pose_features.append(
 63 |             f.f_nplane("chest", "neck", "neck", "rwrist", 0.2 * f.hl)
 64 |         )
 65 |         pose_features.append(
 66 |             f.f_nplane("chest", "neck", "neck", "lwrist", 0.2 * f.hl)
 67 |         )
 68 |         pose_features.append(
 69 |             f.f_move("belly", "chest", "chest", "rwrist", 1.8 * f.hl)
 70 |         )
 71 |         pose_features.append(
 72 |             f.f_move("belly", "chest", "chest", "lwrist", 1.8 * f.hl)
 73 |         )
 74 |         pose_features.append(
 75 |             f.f_angle("relbow", "rshoulder", "relbow", "rwrist", [0, 110])
 76 |         )
 77 |         pose_features.append(
 78 |             f.f_angle("lelbow", "lshoulder", "lelbow", "lwrist", [0, 110])
 79 |         )
 80 |         pose_features.append(
 81 |             f.f_nplane(
 82 |                 "lshoulder", "rshoulder", "lwrist", "rwrist", 2.5 * f.sw
 83 |             )
 84 |         )
 85 |         pose_features.append(
 86 |             f.f_move("lwrist", "rwrist", "rwrist", "lwrist", 1.4 * f.hl)
 87 |         )
 88 |         pose_features.append(
 89 |             f.f_move("rwrist", "root", "lwrist", "root", 1.4 * f.hl)
 90 |         )
 91 |         pose_features.append(
 92 |             f.f_move("lwrist", "root", "rwrist", "root", 1.4 * f.hl)
 93 |         )
 94 |         pose_features.append(f.f_fast("rwrist", 2.5 * f.hl))
 95 |         pose_features.append(f.f_fast("lwrist", 2.5 * f.hl))
 96 |         pose_features.append(
 97 |             f.f_plane("root", "lhip", "ltoes", "rankle", 0.38 * f.hl)
 98 |         )
 99 |         pose_features.append(
100 |             f.f_plane("root", "rhip", "rtoes", "lankle", 0.38 * f.hl)
101 |         )
102 |         pose_features.append(
103 |             f.f_nplane("zero", "y_unit", "y_min", "rankle", 1.2 * f.hl)
104 |         )
105 |         pose_features.append(
106 |             f.f_nplane("zero", "y_unit", "y_min", "lankle", 1.2 * f.hl)
107 |         )
108 |         pose_features.append(
109 |             f.f_nplane("lhip", "rhip", "lankle", "rankle", 2.1 * f.hw)
110 |         )
111 |         pose_features.append(
112 |             f.f_angle("rknee", "rhip", "rknee", "rankle", [0, 110])
113 |         )
114 |         pose_features.append(
115 |             f.f_angle("lknee", "lhip", "lknee", "lankle", [0, 110])
116 |         )
117 |         pose_features.append(f.f_fast("rankle", 2.5 * f.hl))
118 |         pose_features.append(f.f_fast("lankle", 2.5 * f.hl))
119 |         pose_features.append(
120 |             f.f_angle("neck", "root", "rshoulder", "relbow", [25, 180])
121 |         )
122 |         pose_features.append(
123 |             f.f_angle("neck", "root", "lshoulder", "lelbow", [25, 180])
124 |         )
125 |         pose_features.append(
126 |             f.f_angle("neck", "root", "rhip", "rknee", [50, 180])
127 |         )
128 |         pose_features.append(
129 |             f.f_angle("neck", "root", "lhip", "lknee", [50, 180])
130 |         )
131 |         pose_features.append(
132 |             f.f_plane("rankle", "neck", "lankle", "root", 0.5 * f.hl)
133 |         )
134 |         pose_features.append(
135 |             f.f_angle("neck", "root", "zero", "y_unit", [70, 110])
136 |         )
137 |         pose_features.append(
138 |             f.f_nplane("zero", "minus_y_unit", "y_min", "rwrist", -1.2 * f.hl)
139 |         )
140 |         pose_features.append(
141 |             f.f_nplane("zero", "minus_y_unit", "y_min", "lwrist", -1.2 * f.hl)
142 |         )
143 |         pose_features.append(f.f_fast("root", 2.3 * f.hl))
144 |         features.append(pose_features)
145 |         f.next_frame()
146 |     features = np.array(features, dtype=np.float32).mean(axis=0)
147 |     return features
148 | 
149 | 
150 | class ManualFeatures:
151 |     def __init__(self, positions, joint_names=SMPL_JOINT_NAMES):
152 |         self.positions = positions
153 |         self.joint_names = joint_names
154 |         self.frame_num = 1
155 | 
156 |         # humerus length
157 |         self.hl = feat_utils.distance_between_points(
158 |             [1.99113488e-01,  2.36807942e-01, -1.80702247e-02],  # "lshoulder",
159 |             [4.54445392e-01,  2.21158922e-01, -4.10167128e-02],  # "lelbow"
160 |         )
161 |         # shoulder width
162 |         self.sw = feat_utils.distance_between_points(
163 |             [1.99113488e-01,  2.36807942e-01, -1.80702247e-02],  # "lshoulder"
164 |             [-1.91692337e-01,  2.36928746e-01, -1.23055102e-02,],  # "rshoulder"
165 |         )
166 |         # hip width
167 |         self.hw = feat_utils.distance_between_points(
168 |             [5.64076714e-02, -3.23069185e-01,  1.09197125e-02],  # "lhip"
169 |             [-6.24834076e-02, -3.31302464e-01,  1.50412619e-02],  # "rhip"
170 |         )
171 | 
172 |     def next_frame(self):
173 |         self.frame_num += 1
174 | 
175 |     def transform_and_fetch_position(self, j):
176 |         if j == "y_unit":
177 |             return [0, 1, 0]
178 |         elif j == "minus_y_unit":
179 |             return [0, -1, 0]
180 |         elif j == "zero":
181 |             return [0, 0, 0]
182 |         elif j == "y_min":
183 |             return [
184 |                 0,
185 |                 min(
186 |                     [y for (_, y, _) in self.positions[self.frame_num]]
187 |                 ),
188 |                 0,
189 |             ]
190 |         return self.positions[self.frame_num][
191 |             self.joint_names.index(j)
192 |         ]
193 | 
194 |     def transform_and_fetch_prev_position(self, j):
195 |         return self.positions[self.frame_num - 1][
196 |             self.joint_names.index(j)
197 |         ]
198 | 
199 |     def f_move(self, j1, j2, j3, j4, range):
200 |         j1_prev, j2_prev, j3_prev, j4_prev = [
201 |             self.transform_and_fetch_prev_position(j) for j in [j1, j2, j3, j4]
202 |         ]
203 |         j1, j2, j3, j4 = [
204 |             self.transform_and_fetch_position(j) for j in [j1, j2, j3, j4]
205 |         ]
206 |         return feat_utils.velocity_direction_above_threshold(
207 |             j1, j1_prev, j2, j2_prev, j3, j3_prev, range
208 |         )
209 | 
210 |     def f_nmove(self, j1, j2, j3, j4, range):
211 |         j1_prev, j2_prev, j3_prev, j4_prev = [
212 |             self.transform_and_fetch_prev_position(j) for j in [j1, j2, j3, j4]
213 |         ]
214 |         j1, j2, j3, j4 = [
215 |             self.transform_and_fetch_position(j) for j in [j1, j2, j3, j4]
216 |         ]
217 |         return feat_utils.velocity_direction_above_threshold_normal(
218 |             j1, j1_prev, j2, j3, j4, j4_prev, range
219 |         )
220 | 
221 |     def f_plane(self, j1, j2, j3, j4, threshold):
222 |         j1, j2, j3, j4 = [
223 |             self.transform_and_fetch_position(j) for j in [j1, j2, j3, j4]
224 |         ]
225 |         return feat_utils.distance_from_plane(j1, j2, j3, j4, threshold)
226 | 
227 |     def f_nplane(self, j1, j2, j3, j4, threshold):
228 |         j1, j2, j3, j4 = [
229 |             self.transform_and_fetch_position(j) for j in [j1, j2, j3, j4]
230 |         ]
231 |         return feat_utils.distance_from_plane_normal(j1, j2, j3, j4, threshold)
232 | 
233 |     def f_angle(self, j1, j2, j3, j4, range):
234 |         j1, j2, j3, j4 = [
235 |             self.transform_and_fetch_position(j) for j in [j1, j2, j3, j4]
236 |         ]
237 |         return feat_utils.angle_within_range(j1, j2, j3, j4, range)
238 | 
239 |     def f_fast(self, j1, threshold):
240 |         j1_prev = self.transform_and_fetch_prev_position(j1)
241 |         j1 = self.transform_and_fetch_position(j1)
242 |         return feat_utils.velocity_above_threshold(j1, j1_prev, threshold)
243 | 


--------------------------------------------------------------------------------
/aist_plusplus/features/utils.py:
--------------------------------------------------------------------------------
  1 | # BSD License
  2 | 
  3 | # For fairmotion software
  4 | 
  5 | # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
  6 | 
  7 | # Redistribution and use in source and binary forms, with or without modification,
  8 | # are permitted provided that the following conditions are met:
  9 | 
 10 | #  * Redistributions of source code must retain the above copyright notice, this
 11 | #    list of conditions and the following disclaimer.
 12 | 
 13 | #  * Redistributions in binary form must reproduce the above copyright notice,
 14 | #    this list of conditions and the following disclaimer in the documentation
 15 | #    and/or other materials provided with the distribution.
 16 | 
 17 | #  * Neither the name Facebook nor the names of its contributors may be used to
 18 | #    endorse or promote products derived from this software without specific
 19 | #    prior written permission.
 20 | 
 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 22 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 23 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 25 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 26 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 27 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 28 | # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 30 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | import numpy as np
 32 | 
 33 | 
 34 | def distance_between_points(a, b):
 35 |     return np.linalg.norm(np.array(a) - np.array(b))
 36 | 
 37 | 
 38 | def distance_from_plane(a, b, c, p, threshold):
 39 |     ba = np.array(b) - np.array(a)
 40 |     ca = np.array(c) - np.array(a)
 41 |     cross = np.cross(ca, ba)
 42 | 
 43 |     pa = np.array(p) - np.array(a)
 44 |     return np.dot(cross, pa) / np.linalg.norm(cross) > threshold
 45 | 
 46 | 
 47 | def distance_from_plane_normal(n1, n2, a, p, threshold):
 48 |     normal = np.array(n2) - np.array(n1)
 49 |     pa = np.array(p) - np.array(a)
 50 |     return np.dot(normal, pa) / np.linalg.norm(normal) > threshold
 51 | 
 52 | 
 53 | def angle_within_range(j1, j2, k1, k2, range):
 54 |     j = np.array(j2) - np.array(j1)
 55 |     k = np.array(k2) - np.array(k1)
 56 | 
 57 |     angle = np.arccos(np.dot(j, k) / (np.linalg.norm(j) * np.linalg.norm(k)))
 58 |     angle = np.degrees(angle)
 59 | 
 60 |     if angle > range[0] and angle < range[1]:
 61 |         return True
 62 |     else:
 63 |         return False
 64 | 
 65 | 
 66 | def velocity_direction_above_threshold(
 67 |     j1, j1_prev, j2, j2_prev, p, p_prev, threshold, time_per_frame=1 / 120
 68 | ):
 69 |     velocity = (
 70 |         np.array(p) - np.array(j1) - (np.array(p_prev) - np.array(j1_prev))
 71 |     )
 72 |     direction = np.array(j2) - np.array(j1)
 73 | 
 74 |     velocity_along_direction = np.dot(velocity, direction) / np.linalg.norm(
 75 |         direction
 76 |     )
 77 |     velocity_along_direction = velocity_along_direction / time_per_frame
 78 |     return velocity_along_direction > threshold
 79 | 
 80 | 
 81 | def velocity_direction_above_threshold_normal(
 82 |     j1, j1_prev, j2, j3, p, p_prev, threshold, time_per_frame=1 / 120
 83 | ):
 84 |     velocity = (
 85 |         np.array(p) - np.array(j1) - (np.array(p_prev) - np.array(j1_prev))
 86 |     )
 87 |     j31 = np.array(j3) - np.array(j1)
 88 |     j21 = np.array(j2) - np.array(j1)
 89 |     direction = np.cross(j31, j21)
 90 | 
 91 |     velocity_along_direction = np.dot(velocity, direction) / np.linalg.norm(
 92 |         direction
 93 |     )
 94 |     velocity_along_direction = velocity_along_direction / time_per_frame
 95 |     return velocity_along_direction > threshold
 96 | 
 97 | 
 98 | def velocity_above_threshold(p, p_prev, threshold, time_per_frame=1 / 120):
 99 |     velocity = np.linalg.norm(np.array(p) - np.array(p_prev)) / time_per_frame
100 |     return velocity > threshold
101 | 
102 | 
103 | def calc_average_velocity(positions, i, joint_idx, sliding_window, frame_time):
104 |     current_window = 0
105 |     average_velocity = np.zeros(len(positions[0][joint_idx]))
106 |     for j in range(-sliding_window, sliding_window + 1):
107 |         if i + j - 1 < 0 or i + j >= len(positions):
108 |             continue
109 |         average_velocity += (
110 |             positions[i + j][joint_idx] - positions[i + j - 1][joint_idx]
111 |         )
112 |         current_window += 1
113 |     return np.linalg.norm(average_velocity / (current_window * frame_time))
114 | 
115 | 
116 | def calc_average_acceleration(
117 |     positions, i, joint_idx, sliding_window, frame_time
118 | ):
119 |     current_window = 0
120 |     average_acceleration = np.zeros(len(positions[0][joint_idx]))
121 |     for j in range(-sliding_window, sliding_window + 1):
122 |         if i + j - 1 < 0 or i + j + 1 >= len(positions):
123 |             continue
124 |         v2 = (
125 |             positions[i + j + 1][joint_idx] - positions[i + j][joint_idx]
126 |         ) / frame_time
127 |         v1 = (
128 |             positions[i + j][joint_idx]
129 |             - positions[i + j - 1][joint_idx] / frame_time
130 |         )
131 |         average_acceleration += (v2 - v1) / frame_time
132 |         current_window += 1
133 |     return np.linalg.norm(average_acceleration / current_window)
134 | 
135 | 
136 | def calc_average_velocity_horizontal(
137 |     positions, i, joint_idx, sliding_window, frame_time, up_vec="z"
138 | ):
139 |     current_window = 0
140 |     average_velocity = np.zeros(len(positions[0][joint_idx]))
141 |     for j in range(-sliding_window, sliding_window + 1):
142 |         if i + j - 1 < 0 or i + j >= len(positions):
143 |             continue
144 |         average_velocity += (
145 |             positions[i + j][joint_idx] - positions[i + j - 1][joint_idx]
146 |         )
147 |         current_window += 1
148 |     if up_vec == "y":
149 |         average_velocity = np.array(
150 |             [average_velocity[0], average_velocity[2]]
151 |         ) / (current_window * frame_time)
152 |     elif up_vec == "z":
153 |         average_velocity = np.array(
154 |             [average_velocity[0], average_velocity[1]]
155 |         ) / (current_window * frame_time)
156 |     else:
157 |         raise NotImplementedError
158 |     return np.linalg.norm(average_velocity)
159 | 
160 | 
161 | def calc_average_velocity_vertical(
162 |     positions, i, joint_idx, sliding_window, frame_time, up_vec
163 | ):
164 |     current_window = 0
165 |     average_velocity = np.zeros(len(positions[0][joint_idx]))
166 |     for j in range(-sliding_window, sliding_window + 1):
167 |         if i + j - 1 < 0 or i + j >= len(positions):
168 |             continue
169 |         average_velocity += (
170 |             positions[i + j][joint_idx] - positions[i + j - 1][joint_idx]
171 |         )
172 |         current_window += 1
173 |     if up_vec == "y":
174 |         average_velocity = np.array([average_velocity[1]]) / (
175 |             current_window * frame_time
176 |         )
177 |     elif up_vec == "z":
178 |         average_velocity = np.array([average_velocity[2]]) / (
179 |             current_window * frame_time
180 |         )
181 |     else:
182 |         raise NotImplementedError
183 |     return np.linalg.norm(average_velocity)


--------------------------------------------------------------------------------
/aist_plusplus/loader.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Google AI Perception Team Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """AIST++ Dataset Loader."""
 16 | import json
 17 | import os
 18 | import pickle
 19 | 
 20 | import aniposelib
 21 | import numpy as np
 22 | import cv2
 23 | 
 24 | 
 25 | class AISTDataset:
 26 |   """A dataset class for loading, processing and plotting AIST++."""
 27 | 
 28 |   VIEWS = ['c01', 'c02', 'c03', 'c04', 'c05', 'c06', 'c07', 'c08', 'c09']
 29 | 
 30 |   def __init__(self, anno_dir):
 31 |     assert os.path.exists(anno_dir), f'Data does not exist at {anno_dir}!'
 32 | 
 33 |     # Init paths
 34 |     self.camera_dir = os.path.join(anno_dir, 'cameras/')
 35 |     self.motion_dir = os.path.join(anno_dir, 'motions/')
 36 |     self.keypoint3d_dir = os.path.join(anno_dir, 'keypoints3d/')
 37 |     self.keypoint2d_dir = os.path.join(anno_dir, 'keypoints2d/')
 38 |     self.filter_file = os.path.join(anno_dir, 'ignore_list.txt')
 39 | 
 40 |     # Load environment setting mapping
 41 |     self.mapping_seq2env = {}  # sequence name -> env name
 42 |     self.mapping_env2seq = {}  # env name -> a list of sequence names
 43 |     env_mapping_file = os.path.join(self.camera_dir, 'mapping.txt')
 44 |     env_mapping = np.loadtxt(env_mapping_file, dtype=str)
 45 |     for seq_name, env_name in env_mapping:
 46 |       self.mapping_seq2env[seq_name] = env_name
 47 |       if env_name not in self.mapping_env2seq:
 48 |         self.mapping_env2seq[env_name] = []
 49 |       self.mapping_env2seq[env_name].append(seq_name)
 50 | 
 51 |   @classmethod
 52 |   def get_video_name(cls, seq_name, view):
 53 |     """Get AIST video name from AIST++ sequence name."""
 54 |     return seq_name.replace('cAll', view)
 55 | 
 56 |   @classmethod
 57 |   def get_seq_name(cls, video_name):
 58 |     """Get AIST++ sequence name from AIST video name."""
 59 |     tags = video_name.split('_')
 60 |     if len(tags) == 3:
 61 |       view = tags[1]
 62 |       tags[1] = 'cAll'
 63 |     else:
 64 |       view = tags[2]
 65 |       tags[2] = 'cAll'
 66 |     return '_'.join(tags), view
 67 | 
 68 |   @classmethod
 69 |   def load_camera_group(cls, camera_dir, env_name):
 70 |     """Load a set of cameras in the environment."""
 71 |     file_path = os.path.join(camera_dir, f'{env_name}.json')
 72 |     assert os.path.exists(file_path), f'File {file_path} does not exist!'
 73 |     with open(file_path, 'r') as f:
 74 |       params = json.load(f)
 75 |     cameras = []
 76 |     for param_dict in params:
 77 |       camera = aniposelib.cameras.Camera(name=param_dict['name'],
 78 |                                          size=param_dict['size'],
 79 |                                          matrix=param_dict['matrix'],
 80 |                                          rvec=param_dict['rotation'],
 81 |                                          tvec=param_dict['translation'],
 82 |                                          dist=param_dict['distortions'])
 83 |       cameras.append(camera)
 84 |     camera_group = aniposelib.cameras.CameraGroup(cameras)
 85 |     return camera_group
 86 | 
 87 |   @classmethod
 88 |   def load_motion(cls, motion_dir, seq_name):
 89 |     """Load a motion sequence represented using SMPL format."""
 90 |     file_path = os.path.join(motion_dir, f'{seq_name}.pkl')
 91 |     assert os.path.exists(file_path), f'File {file_path} does not exist!'
 92 |     with open(file_path, 'rb') as f:
 93 |       data = pickle.load(f)
 94 |     smpl_poses = data['smpl_poses']  # (N, 24, 3)
 95 |     smpl_scaling = data['smpl_scaling']  # (1,)
 96 |     smpl_trans = data['smpl_trans']  # (N, 3)
 97 |     return smpl_poses, smpl_scaling, smpl_trans
 98 | 
 99 |   @classmethod
100 |   def load_keypoint3d(cls, keypoint_dir, seq_name, use_optim=False):
101 |     """Load a 3D keypoint sequence represented using COCO format."""
102 |     file_path = os.path.join(keypoint_dir, f'{seq_name}.pkl')
103 |     assert os.path.exists(file_path), f'File {file_path} does not exist!'
104 |     with open(file_path, 'rb') as f:
105 |       data = pickle.load(f)
106 |     if use_optim:
107 |       return data['keypoints3d_optim']  # (N, 17, 3)
108 |     else:
109 |       return data['keypoints3d']  # (N, 17, 3)
110 | 
111 |   @classmethod
112 |   def load_keypoint2d(cls, keypoint_dir, seq_name):
113 |     """Load a 2D keypoint sequence represented using COCO format."""
114 |     file_path = os.path.join(keypoint_dir, f'{seq_name}.pkl')
115 |     assert os.path.exists(file_path), f'File {file_path} does not exist!'
116 |     with open(file_path, 'rb') as f:
117 |       data = pickle.load(f)
118 |     if 'det_scores' in data:
119 |       keypoints2d = data['keypoints2d']  # (nviews, N, 17, 3)
120 |       det_scores = data['det_scores']  # (nviews, N)
121 |       timestamps = data['timestamps']  # (N,)
122 |       return keypoints2d, det_scores, timestamps
123 |     else:
124 |       keypoints2d = data['keypoints2d']  # (nviews, nframes, (nsubjects, (133, 3)))
125 |       bboxes = data['bboxes']  # (nviews, (nframes, (nsubjects, (5,))))
126 |       timestamps = data['timestamps']  # (nviews, (nframes,))
127 |       return keypoints2d, bboxes, timestamps
128 | 
129 |   @classmethod
130 |   def load_frames(cls, video_path, frame_ids=None, fps=60):
131 |     """Load a single or multiple frames from a video."""
132 |     if frame_ids is None:
133 |       frame_ids = range(1e6)
134 |     assert isinstance(frame_ids, list)
135 |     if not os.path.exists(video_path):
136 |       return None
137 |     cap = cv2.VideoCapture(video_path)
138 |     assert cap.isOpened(), "check if your opencv is installed with ffmpeg supported."
139 | 
140 |     images = []
141 |     for frame_id in frame_ids:
142 |       sec = frame_id * 1.0 / fps
143 |       cap.set(cv2.CAP_PROP_POS_MSEC, (sec * 1000))
144 |       success, image = cap.read()
145 |       if not success:
146 |         break
147 |       images.append(image)
148 | 
149 |     if len(images) > 0:
150 |       images = np.stack(images)
151 |     else:
152 |       images = None
153 | 
154 |     cap.release()
155 |     return images
156 | 


--------------------------------------------------------------------------------
/aist_plusplus/utils.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Google AI Perception Team Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Utils for AIST++ Dataset."""
 16 | import os
 17 | 
 18 | import ffmpeg
 19 | import numpy as np
 20 | 
 21 | 
 22 | def ffmpeg_video_read(video_path, fps=None):
 23 |   """Video reader based on FFMPEG.
 24 | 
 25 |   This function supports setting fps for video reading. It is critical
 26 |   as AIST++ Dataset are constructed under exact 60 fps, while some of
 27 |   the AIST dance videos are not percisely 60 fps.
 28 | 
 29 |   Args:
 30 |     video_path: A video file.
 31 |     fps: Use specific fps for video reading. (optional)
 32 |   Returns:
 33 |     A `np.array` with the shape of [seq_len, height, width, 3]
 34 |   """
 35 |   assert os.path.exists(video_path), f'{video_path} does not exist!'
 36 |   try:
 37 |     probe = ffmpeg.probe(video_path)
 38 |   except ffmpeg.Error as e:
 39 |     print('stdout:', e.stdout.decode('utf8'))
 40 |     print('stderr:', e.stderr.decode('utf8'))
 41 |     raise e
 42 |   video_info = next(stream for stream in probe['streams']
 43 |                     if stream['codec_type'] == 'video')
 44 |   width = int(video_info['width'])
 45 |   height = int(video_info['height'])
 46 |   stream = ffmpeg.input(video_path)
 47 |   if fps:
 48 |     stream = ffmpeg.filter(stream, 'fps', fps=fps, round='down')
 49 |   stream = ffmpeg.output(stream, 'pipe:', format='rawvideo', pix_fmt='rgb24')
 50 |   out, _ = ffmpeg.run(stream, capture_stdout=True)
 51 |   out = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3])
 52 |   return out.copy()
 53 | 
 54 | 
 55 | def ffmpeg_video_write(data, video_path, fps=25):
 56 |   """Video writer based on FFMPEG.
 57 | 
 58 |   Args:
 59 |     data: A `np.array` with the shape of [seq_len, height, width, 3]
 60 |     video_path: A video file.
 61 |     fps: Use specific fps for video writing. (optional)
 62 |   """
 63 |   assert len(data.shape) == 4, f'input shape is not valid! Got {data.shape}!'
 64 |   _, height, width, _ = data.shape
 65 |   os.makedirs(os.path.dirname(video_path), exist_ok=True)
 66 |   writer = (
 67 |       ffmpeg
 68 |       .input('pipe:', framerate=fps, format='rawvideo',
 69 |              pix_fmt='rgb24', s='{}x{}'.format(width, height))
 70 |       .output(video_path, pix_fmt='yuv420p')
 71 |       .overwrite_output()
 72 |       .run_async(pipe_stdin=True)
 73 |   )
 74 |   for frame in data:
 75 |     writer.stdin.write(frame.astype(np.uint8).tobytes())
 76 |   writer.stdin.close()
 77 | 
 78 | 
 79 | def ffmpeg_video_to_images(video_path, image_dir, fps=None, ext=".jpg") -> None:
 80 |   """Video to images converter based on FFMPEG.
 81 | 
 82 |   This function supports setting fps for video reading. It is critical
 83 |   as AIST++ Dataset are constructed under exact 60 fps, while some of
 84 |   the AIST dance videos are not percisely 60 fps.
 85 | 
 86 |   Args:
 87 |     video_path: A video file.
 88 |     image_dir: A output directory to store the images.
 89 |     fps: Use specific fps for video reading. (optional)
 90 |   """
 91 |   assert os.path.exists(video_path), f'{video_path} does not exist!'
 92 |   os.makedirs(image_dir, exist_ok=True)
 93 |   stream = ffmpeg.input(video_path)
 94 |   if fps:
 95 |     stream = ffmpeg.filter(stream, 'fps', fps=fps, round='down')
 96 |   stream = ffmpeg.output(
 97 |     stream, os.path.join(image_dir, '%08d' + ext), start_number=0)
 98 |   stream = ffmpeg.overwrite_output(stream)
 99 |   ffmpeg.run(stream, quiet=True)
100 | 
101 | 
102 | def unify_joint_mappings(dataset='openpose25'):
103 |   """Unify different joint definations.
104 | 
105 |   Output unified defination:
106 |       ['Nose',
107 |       'RShoulder', 'RElbow', 'RWrist',
108 |       'LShoulder', 'LElbow', 'LWrist',
109 |       'RHip', 'RKnee', 'RAnkle',
110 |       'LHip', 'LKnee', 'LAnkle',
111 |       'REye', 'LEye',
112 |       'REar', 'LEar',
113 |       'LBigToe', 'LHeel',
114 |       'RBigToe', 'RHeel',]
115 | 
116 |   Args:
117 |     dataset: `openpose25`, `coco`(17) and `smpl`.
118 |   Returns:
119 |     a list of indexs that maps the joints to a unified defination.
120 |   """
121 |   if dataset == 'openpose25':
122 |     return np.array([
123 |         0,
124 |         2, 3, 4,
125 |         5, 6, 7,
126 |         9, 10, 11,
127 |         12, 13, 14,
128 |         15, 16,
129 |         17, 18,
130 |         19, 21,
131 |         22, 24,
132 |     ], dtype=np.int32)
133 |   elif dataset == 'smpl':
134 |     # note SMPL needs to be "left-right flipped" to be consistent
135 |     # with others
136 |     return np.array([
137 |         24,
138 |         16, 18, 20,
139 |         17, 19, 21,
140 |         1, 4, 7,
141 |         2, 5, 8,
142 |         26, 25,
143 |         28, 27,
144 |         32, 34,
145 |         29, 31,
146 |     ], dtype=np.int32)
147 |   elif dataset == 'coco':
148 |     return np.array([
149 |         0,
150 |         5, 7, 9,
151 |         6, 8, 10,
152 |         11, 13, 15,
153 |         12, 14, 16,
154 |         1, 2,
155 |         3, 4,
156 |     ], dtype=np.int32)
157 |   else:
158 |     raise ValueError(f'{dataset} is not supported')
159 | 
160 | 


--------------------------------------------------------------------------------
/aist_plusplus/visualizer.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google AI Perception Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Visualize the AIST++ Dataset."""
16 | 
17 | from . import utils
18 | import cv2
19 | import numpy as np
20 | 
21 | _COLORS = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0],
22 |            [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85],
23 |            [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255],
24 |            [0, 0, 255], [85, 0, 255], [170, 0, 255], [255, 0, 255],
25 |            [255, 0, 170], [255, 0, 85]]
26 | 
27 | 
28 | def plot_kpt(keypoint, canvas, color=None):
29 |   for i, (x, y) in enumerate(keypoint[:, 0:2]):
30 |     if np.isnan(x) or np.isnan(y) or x < 0 or y < 0:
31 |       continue
32 |     cv2.circle(canvas, (int(x), int(y)),
33 |                7,
34 |                color if color is not None else _COLORS[i % len(_COLORS)],
35 |                thickness=-1)
36 |   return canvas
37 | 
38 | 
39 | def plot_on_video(keypoints2d, video_path, save_path, fps=60):
40 |   assert len(keypoints2d.shape) == 3, (
41 |       f'Input shape is not valid! Got {keypoints2d.shape}')
42 |   video = utils.ffmpeg_video_read(video_path, fps=fps)
43 |   for iframe, keypoint in enumerate(keypoints2d):
44 |     if iframe >= video.shape[0]:
45 |       break
46 |     video[iframe] = plot_kpt(keypoint, video[iframe])
47 |   utils.ffmpeg_video_write(video, save_path, fps=fps)
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/assets/aist_pipeline.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/aistplusplus_api/2dd7b3e946b794fd0081c98e2e2433545abf8b87/assets/aist_pipeline.jpg


--------------------------------------------------------------------------------
/demos/extract_motion_feats.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google AI Perception Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Demo code for motion feature extraction."""
16 | from absl import app
17 | from absl import flags
18 | from aist_plusplus.loader import AISTDataset
19 | from aist_plusplus.features.kinetic import extract_kinetic_features
20 | from aist_plusplus.features.manual import extract_manual_features
21 | from smplx import SMPL
22 | import torch
23 | 
24 | 
25 | FLAGS = flags.FLAGS
26 | flags.DEFINE_string(
27 |     'anno_dir',
28 |     '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/',
29 |     'input local dictionary for AIST++ annotations.')
30 | flags.DEFINE_string(
31 |     'smpl_dir',
32 |     '/usr/local/google/home/ruilongli/data/SMPL/',
33 |     'input local dictionary that stores SMPL data.')
34 | flags.DEFINE_string(
35 |     'video_name',
36 |     'gWA_sFM_c01_d27_mWA2_ch21',
37 |     'input video name to be visualized.')
38 | 
39 | 
40 | def main(_):
41 |   # Parsing data info.
42 |   aist_dataset = AISTDataset(FLAGS.anno_dir)
43 |   seq_name, view = AISTDataset.get_seq_name(FLAGS.video_name)
44 | 
45 |   # SMPL joints
46 |   smpl_poses, smpl_scaling, smpl_trans = AISTDataset.load_motion(
47 |       aist_dataset.motion_dir, seq_name)
48 |   smpl = SMPL(model_path=FLAGS.smpl_dir, gender='MALE', batch_size=1)
49 |   # Note here we calculate `transl` as `smpl_trans/smpl_scaling` for 
50 |   # normalizing the motion in generic SMPL model scale.
51 |   keypoints3d = smpl.forward(
52 |       global_orient=torch.from_numpy(smpl_poses[:, 0:1]).float(),
53 |       body_pose=torch.from_numpy(smpl_poses[:, 1:]).float(),
54 |       transl=torch.from_numpy(smpl_trans / smpl_scaling).float(),
55 |       ).joints.detach().numpy()
56 |   
57 |   # extract features
58 |   features_k = extract_kinetic_features(keypoints3d)
59 |   print ("kinetic features:", features_k)
60 |   features_m = extract_manual_features(keypoints3d)
61 |   print ("manual features:", features_m)
62 | 
63 | 
64 | if __name__ == '__main__':
65 |   app.run(main)
66 | 
67 | 


--------------------------------------------------------------------------------
/demos/run_dyn_processing.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | import json
  4 | import glob
  5 | 
  6 | from absl import app
  7 | from absl import flags
  8 | from aist_plusplus.loader import AISTDataset
  9 | from aist_plusplus.utils import ffmpeg_video_to_images
 10 | from smplx import SMPL
 11 | import torch
 12 | import imageio
 13 | import numpy as np
 14 | 
 15 | FLAGS = flags.FLAGS
 16 | 
 17 | flags.DEFINE_list(
 18 |     'sequence_names',
 19 |     "gBR_sBM_cAll_d04_mBR0_ch01",
 20 |     'list of sequence names to be processed. None means to process all.')
 21 | flags.DEFINE_string(
 22 |     'anno_dir',
 23 |     '/home/ruilongli/data/AIST++/',
 24 |     'input local dictionary for AIST++ annotations.')
 25 | flags.DEFINE_string(
 26 |     'smpl_dir',
 27 |     '/home/ruilongli/data/smpl_model/smpl/',
 28 |     'input local dictionary that stores SMPL data.')
 29 | flags.DEFINE_string(
 30 |     'video_dir',
 31 |     '/home/ruilongli/data/AIST/videos/10M/',
 32 |     'input local dictionary for AIST Dance Videos.')
 33 | flags.DEFINE_string(
 34 |     'video_alpha_dir',
 35 |     '/home/ruilongli/data/AIST++/segmentation/',
 36 |     'output local dictionary that stores AIST++ segmentation masks.')
 37 | flags.DEFINE_string(
 38 |     'output_dir',
 39 |     '/home/ruilongli/data/AIST++_dyn',
 40 |     'output local dictionary that stores AIST images.')
 41 | 
 42 | 
 43 | def main(_):
 44 |     aist_dataset = AISTDataset(anno_dir=FLAGS.anno_dir)
 45 | 
 46 |     for seq_name in FLAGS.sequence_names:
 47 |         output_dir = os.path.join(FLAGS.output_dir, seq_name)
 48 | 
 49 |         # split images & masks
 50 |         for view in AISTDataset.VIEWS:
 51 |             video_name = AISTDataset.get_video_name(seq_name, view)
 52 |             logging.info("processing %s" % video_name)
 53 |             
 54 |             video_file = os.path.join(FLAGS.video_dir, video_name + ".mp4")
 55 |             image_dir = os.path.join(output_dir, "images", view)
 56 |             os.makedirs(image_dir, exist_ok=True)
 57 |             ffmpeg_video_to_images(video_file, image_dir, fps=60, ext=".jpg")
 58 | 
 59 |             video_file = os.path.join(FLAGS.video_alpha_dir, video_name + "_alpha1.mp4")
 60 |             image_dir = os.path.join(output_dir, "alpha1", view)
 61 |             os.makedirs(image_dir, exist_ok=True)
 62 |             ffmpeg_video_to_images(video_file, image_dir, fps=60, ext=".png")
 63 | 
 64 |             video_file = os.path.join(FLAGS.video_alpha_dir, video_name + "_alpha2.mp4")
 65 |             image_dir = os.path.join(output_dir, "alpha2", view)
 66 |             os.makedirs(image_dir, exist_ok=True)
 67 |             ffmpeg_video_to_images(video_file, image_dir, fps=60, ext=".png")
 68 | 
 69 |         # camera data
 70 |         env_name = aist_dataset.mapping_seq2env[seq_name]
 71 |         cgroup = AISTDataset.load_camera_group(aist_dataset.camera_dir, env_name)
 72 |         camera_data = cgroup.get_dicts()
 73 |         with open(os.path.join(output_dir, "camera.json"), "w") as fp:
 74 |             json.dump(camera_data, fp)
 75 | 
 76 |         # pose data
 77 |         pose_data = {}
 78 | 
 79 |         smpl_poses, smpl_scaling, smpl_trans = AISTDataset.load_motion(
 80 |             aist_dataset.motion_dir, seq_name)
 81 |         smpl_poses = torch.from_numpy(smpl_poses).float()
 82 |         smpl_scaling = torch.from_numpy(smpl_scaling).float()
 83 |         smpl_trans = torch.from_numpy(smpl_trans).float()
 84 | 
 85 |         smpl = SMPL(model_path=FLAGS.smpl_dir, gender='MALE', batch_size=1)
 86 |         with torch.no_grad():
 87 |             rest_output, rest_transforms = smpl.forward(
 88 |                 scaling=smpl_scaling.reshape(1, 1),
 89 |             )
 90 |         pose_data["rest_joints"] = rest_output.joints.squeeze(0)[:24]
 91 |         pose_data["rest_verts"] = rest_output.vertices.squeeze(0)
 92 |         pose_data["rest_tfs"] = rest_transforms.squeeze(0)
 93 | 
 94 |         with torch.no_grad():
 95 |             pose_output, pose_transforms = smpl.forward(
 96 |                 global_orient=smpl_poses[:, 0:1],
 97 |                 body_pose=smpl_poses[:, 1:],
 98 |                 transl=smpl_trans,
 99 |                 scaling=smpl_scaling.reshape(1, 1),
100 |             )
101 |         pose_data["joints"] = pose_output.joints[:, :24]
102 |         pose_data["verts"] = pose_output.vertices
103 |         pose_data["tfs"] = pose_transforms
104 |         pose_data["params"] = torch.cat(
105 |             [smpl_poses, smpl_trans, smpl_scaling.expand(smpl_poses.shape[0], 1)], 
106 |             dim=-1
107 |         )
108 |         for key, value in pose_data.items():
109 |             print (key, value.shape)
110 | 
111 |         torch.save(pose_data, os.path.join(output_dir, "pose_data.pt"))
112 | 
113 |         # post process alpha1 & alpha2 to trimap mask
114 |         for view in AISTDataset.VIEWS:
115 |             video_name = AISTDataset.get_video_name(seq_name, view)
116 |             logging.info("processing %s" % video_name)
117 |             image_dir = os.path.join(output_dir, "images", view)
118 |             image_files = sorted(glob.glob(os.path.join(image_dir, "*.jpg")))
119 |             alpha1_dir = os.path.join(output_dir, "alpha1", view)
120 |             alpha1_files = sorted(glob.glob(os.path.join(alpha1_dir, "*.png")))
121 |             alpha2_dir = os.path.join(output_dir, "alpha2", view)
122 |             alpha2_files = sorted(glob.glob(os.path.join(alpha2_dir, "*.png")))
123 |             mask_dir = os.path.join(output_dir, "mask", view)
124 |             os.makedirs(mask_dir, exist_ok=True)
125 | 
126 |             for image_file, alpha1_file, alpha2_file in zip(
127 |                 image_files, alpha1_files, alpha2_files
128 |             ):
129 |                 image = imageio.imread(image_file)
130 |                 alpha1 = imageio.imread(alpha1_file) / 255.0
131 |                 alpha2 = imageio.imread(alpha2_file) / 255.0
132 |                 fg_mask = (alpha1 > 0.5) & (alpha2 > 0.5)
133 |                 bg_mask = (alpha1 < 0.5) & (alpha2 < 0.5)
134 |                 mask = np.zeros_like(image)
135 |                 mask[fg_mask] = 255
136 |                 mask[bg_mask] = 0
137 |                 mask[~ (fg_mask | bg_mask)] = 128
138 |                 imageio.imwrite(
139 |                     os.path.join(mask_dir, os.path.basename(alpha1_file)), mask)
140 | 
141 | if __name__ == '__main__':
142 |     app.run(main)
143 | 


--------------------------------------------------------------------------------
/demos/run_openpose_pipeline.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | 
3 | SEQUENCE_NAME=gBR_sBM_cAll_d04_mBR0_ch01
4 | 
5 | CUDA_VISIBLE_DEVICES=0,1,2,3 python processing/run_openpose.py --sequence_names=${SEQUENCE_NAME}
6 | CUDA_VISIBLE_DEVICES=0,1,2,3 python processing/run_preprocessing.py --sequence_names=${SEQUENCE_NAME}
7 | CUDA_VISIBLE_DEVICES=0,1,2,3 python processing/run_estimate_keypoints.py --sequence_names=${SEQUENCE_NAME}
8 | CUDA_VISIBLE_DEVICES=0,1,2,3 python processing/run_estimate_smpl.py  --sequence_names=${SEQUENCE_NAME}
9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python processing/run_segmentation.py --sequence_names=${SEQUENCE_NAME}


--------------------------------------------------------------------------------
/demos/run_vis.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Google AI Perception Team Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Demo code for running visualizer."""
 16 | import os
 17 | 
 18 | from absl import app
 19 | from absl import flags
 20 | from aist_plusplus.loader import AISTDataset
 21 | from aist_plusplus.visualizer import plot_on_video
 22 | from smplx import SMPL
 23 | import torch
 24 | 
 25 | FLAGS = flags.FLAGS
 26 | flags.DEFINE_string(
 27 |     'anno_dir',
 28 |     '/home/ruilongli/data/AIST++/',
 29 |     'input local dictionary for AIST++ annotations.')
 30 | flags.DEFINE_string(
 31 |     'video_dir',
 32 |     '/home/ruilongli/data/AIST/videos/10M/',
 33 |     'input local dictionary for AIST Dance Videos.')
 34 | flags.DEFINE_string(
 35 |     'smpl_dir',
 36 |     '/home/ruilongli/data/smpl_model/smpl',
 37 |     'input local dictionary that stores SMPL data.')
 38 | flags.DEFINE_string(
 39 |     'video_name',
 40 |     'gBR_sBM_c01_d04_mBR0_ch01',
 41 |     'input video name to be visualized.')
 42 | flags.DEFINE_string(
 43 |     'save_dir',
 44 |     './',
 45 |     'output local dictionary that stores AIST++ visualization.')
 46 | flags.DEFINE_enum(
 47 |     'mode', '2D', ['2D', '3D', 'SMPL', 'SMPLMesh'],
 48 |     'visualize 3D or 2D keypoints, or SMPL joints on image plane.')
 49 | 
 50 | 
 51 | def main(_):
 52 |   # Parsing data info.
 53 |   aist_dataset = AISTDataset(FLAGS.anno_dir)
 54 |   video_path = os.path.join(FLAGS.video_dir, f'{FLAGS.video_name}.mp4')
 55 |   seq_name, view = AISTDataset.get_seq_name(FLAGS.video_name)
 56 |   view_idx = AISTDataset.VIEWS.index(view)
 57 | 
 58 |   # Parsing keypoints.
 59 |   if FLAGS.mode == '2D':  # raw keypoints detection results.
 60 |     keypoints2d, _, _ = AISTDataset.load_keypoint2d(
 61 |         aist_dataset.keypoint2d_dir, seq_name)
 62 |     keypoints2d = keypoints2d[view_idx, :, :, 0:2]
 63 | 
 64 |   elif FLAGS.mode == '3D':  # 3D keypoints with temporal optimization.
 65 |     keypoints3d = AISTDataset.load_keypoint3d(
 66 |         aist_dataset.keypoint3d_dir, seq_name, use_optim=True)
 67 |     nframes, njoints, _ = keypoints3d.shape
 68 |     env_name = aist_dataset.mapping_seq2env[seq_name]
 69 |     cgroup = AISTDataset.load_camera_group(aist_dataset.camera_dir, env_name)
 70 |     keypoints2d = cgroup.project(keypoints3d)
 71 |     keypoints2d = keypoints2d.reshape(9, nframes, njoints, 2)[view_idx]
 72 | 
 73 |   elif FLAGS.mode == 'SMPL':  # SMPL joints
 74 |     smpl_poses, smpl_scaling, smpl_trans = AISTDataset.load_motion(
 75 |         aist_dataset.motion_dir, seq_name)
 76 |     smpl = SMPL(model_path=FLAGS.smpl_dir, gender='MALE', batch_size=1)
 77 |     keypoints3d = smpl.forward(
 78 |         global_orient=torch.from_numpy(smpl_poses[:, 0:1]).float(),
 79 |         body_pose=torch.from_numpy(smpl_poses[:, 1:]).float(),
 80 |         transl=torch.from_numpy(smpl_trans).float(),
 81 |         scaling=torch.from_numpy(smpl_scaling.reshape(1, 1)).float(),
 82 |         ).joints.detach().numpy()
 83 | 
 84 |     nframes, njoints, _ = keypoints3d.shape
 85 |     env_name = aist_dataset.mapping_seq2env[seq_name]
 86 |     cgroup = AISTDataset.load_camera_group(aist_dataset.camera_dir, env_name)
 87 |     keypoints2d = cgroup.project(keypoints3d)
 88 |     keypoints2d = keypoints2d.reshape(9, nframes, njoints, 2)[view_idx]
 89 | 
 90 |   elif FLAGS.mode == 'SMPLMesh':  # SMPL Mesh
 91 |     import trimesh  # install by `pip install trimesh`
 92 |     import vedo  # install by `pip install vedo`
 93 |     smpl_poses, smpl_scaling, smpl_trans = AISTDataset.load_motion(
 94 |         aist_dataset.motion_dir, seq_name)
 95 |     smpl = SMPL(model_path=FLAGS.smpl_dir, gender='MALE', batch_size=1)
 96 |     vertices = smpl.forward(
 97 |         global_orient=torch.from_numpy(smpl_poses[:, 0:1]).float(),
 98 |         body_pose=torch.from_numpy(smpl_poses[:, 1:]).float(),
 99 |         transl=torch.from_numpy(smpl_trans).float(),
100 |         scaling=torch.from_numpy(smpl_scaling.reshape(1, 1)).float(),
101 |         ).vertices.detach().numpy()[0]  # first frame
102 |     faces = smpl.faces
103 |     mesh = trimesh.Trimesh(vertices, faces)
104 |     mesh.visual.face_colors = [200, 200, 250, 100]
105 |     
106 |     keypoints3d = AISTDataset.load_keypoint3d(
107 |         aist_dataset.keypoint3d_dir, seq_name, use_optim=True)
108 |     pts = vedo.Points(keypoints3d[0], r=20)  # first frame
109 | 
110 |     vedo.show(mesh, pts, interactive=True)
111 |     exit()
112 | 
113 |   # Visualize.
114 |   os.makedirs(FLAGS.save_dir, exist_ok=True)
115 |   save_path = os.path.join(FLAGS.save_dir, f'{FLAGS.video_name}.mp4')
116 |   plot_on_video(keypoints2d, video_path, save_path, fps=60)
117 | 
118 | 
119 | if __name__ == '__main__':
120 |   app.run(main)
121 | 
122 | 


--------------------------------------------------------------------------------
/downloader.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google AI Perception Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Download AIST++ videos from AIST Dance Video Database website.
16 | 
17 | Be aware: Before running this script to download the videos, you should have read
18 | the Terms of Use of the AIST Dance Video Database here:
19 | 
20 | https://aistdancedb.ongaaccel.jp/terms_of_use/
21 | """
22 | import argparse
23 | import multiprocessing
24 | import os
25 | import sys
26 | import urllib.request
27 | from functools import partial
28 | 
29 | SOURCE_URL = 'https://aistdancedb.ongaaccel.jp/v1.0.0/video/10M/'
30 | LIST_URL = 'https://storage.googleapis.com/aist_plusplus_public/20121228/video_list.txt'
31 | 
32 | def _download(video_url, download_folder):
33 |   save_path = os.path.join(download_folder, os.path.basename(video_url))
34 |   urllib.request.urlretrieve(video_url, save_path)
35 | 
36 | if __name__ == '__main__':
37 |   parser = argparse.ArgumentParser(
38 |       description='Scripts for downloading AIST++ videos.')
39 |   parser.add_argument(
40 |       '--download_folder',
41 |       type=str,
42 |       required=True,
43 |       help='where to store AIST++ videos.')
44 |   parser.add_argument(
45 |       '--num_processes',
46 |       type=int,
47 |       default=1,
48 |       help='number of threads for multiprocessing.')
49 |   args = parser.parse_args()
50 | 
51 |   ans = input(
52 |       "Before running this script, please make sure you have read the <Terms of Use> "
53 |       "of AIST Dance Video Database at here: \n"
54 |       "\n"
55 |       "https://aistdancedb.ongaaccel.jp/terms_of_use/\n"
56 |       "\n"
57 |       "Do you agree with the <Terms of Use>? [Y/N]"
58 |   )
59 |   if ans in ["Yes", "YES", "yes", "Y", "y"]:
60 |     pass
61 |   else:
62 |     print ("Program exit. Please first acknowledge the <Terms of Use>.")
63 |     exit()
64 | 
65 |   os.makedirs(args.download_folder, exist_ok=True)
66 | 
67 |   seq_names = urllib.request.urlopen(LIST_URL)
68 |   seq_names = [seq_name.strip().decode('utf-8') for seq_name in seq_names]
69 |   video_urls = [
70 |       os.path.join(SOURCE_URL, seq_name + '.mp4') for seq_name in seq_names]
71 | 
72 |   download_func = partial(_download, download_folder=args.download_folder)
73 |   pool = multiprocessing.Pool(processes=args.num_processes)
74 |   for i, _ in enumerate(pool.imap_unordered(download_func, video_urls)):
75 |     sys.stderr.write('\rdownloading %d / %d' % (i + 1, len(video_urls)))
76 |   sys.stderr.write('\ndone.\n')
77 | 


--------------------------------------------------------------------------------
/processing/requirements.txt:
--------------------------------------------------------------------------------
1 | vedo>=2020.4.2
2 | scipy>=1.3.1


--------------------------------------------------------------------------------
/processing/run_estimate_camera.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Google AI Perception Team Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Estimate AIST++ camera parameters."""
 16 | import json
 17 | import math
 18 | import os
 19 | import random
 20 | 
 21 | from absl import app
 22 | from absl import flags
 23 | from aist_plusplus.loader import AISTDataset
 24 | import aniposelib
 25 | import numpy as np
 26 | import vedo
 27 | import cv2
 28 | from scipy.spatial.transform import Rotation as R
 29 | 
 30 | FLAGS = flags.FLAGS
 31 | flags.DEFINE_string(
 32 |     'anno_dir',
 33 |     '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/',
 34 |     'input local dictionary for AIST++ annotations.')
 35 | flags.DEFINE_string(
 36 |     'save_dir',
 37 |     '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/cameras/',
 38 |     'output local dictionary that stores AIST++ camera parameters.')
 39 | flags.DEFINE_bool(
 40 |     'visualize', False,
 41 |     'Whether to visualize the cameras for debugging.')
 42 | random.seed(0)
 43 | np.random.seed(0)
 44 | 
 45 | 
 46 | def plot_cameras(cgroup):
 47 |   points_world = np.array([
 48 |     [40., 0., 0.],  # arrow x: red
 49 |     [0., 40., 0.],  # arrow y: green
 50 |     [0., 0., 40.],  # arrow z: blue
 51 |   ])
 52 |   colors = ['r', 'g', 'b']
 53 |   axes_all = [
 54 |     vedo.Arrows([[0, 0, 0]], [points_world[i]]).c(colors[i]) 
 55 |     for i in range(3)]
 56 |   for camera in cgroup.cameras:
 57 |     rot_mat = cv2.Rodrigues(camera.rvec)[0]
 58 |     cam_center = - np.linalg.inv(rot_mat).dot(camera.tvec) 
 59 |     points_cam = np.einsum('ij,kj->ki', np.linalg.inv(rot_mat), points_world)
 60 |     axes_all += [
 61 |       vedo.Arrows([cam_center], [cam_center + points_cam[i]]).c(colors[i]) 
 62 |       for i in range(3)]
 63 |     axes_all += [vedo.Text(camera.name, cam_center, s=10)]
 64 |   return axes_all
 65 | 
 66 | 
 67 | def init_env_cameras():
 68 |   """Trys to estimate the environment manually."""
 69 |   cams = []
 70 |   for i, view in enumerate(AISTDataset.VIEWS):
 71 |     f = 1600
 72 |     cx = 1920 // 2
 73 |     cy = 1080 // 2
 74 |     if view == 'c09':
 75 |       r1 = R.from_euler('y', 180, degrees=True) 
 76 |       r2 = R.from_euler('z', 180, degrees=True)
 77 |       rvec = (r1 * r2).as_rotvec()
 78 |       tvec = [0, 170, 500]
 79 |     else:
 80 |       r1 = R.from_euler('y', 180 - 360 // 8 * i, degrees=True) 
 81 |       r2 = R.from_euler('z', 180, degrees=True)
 82 |       rvec = (r1 * r2).as_rotvec()
 83 |       tvec = [0, 180, 500]
 84 | 
 85 |     matrix = np.array([
 86 |         [f, 0, cx],
 87 |         [0, f, cy],
 88 |         [0, 0, 1],
 89 |     ], dtype=np.float32)
 90 |     cams.append(
 91 |         aniposelib.cameras.Camera(
 92 |             matrix=matrix, rvec=rvec, tvec=tvec, name=view, size=(1920, 1080)))
 93 |   cgroup = aniposelib.cameras.CameraGroup(cams)
 94 |   return cgroup
 95 | 
 96 | 
 97 | def main(_):
 98 |   aist_dataset = AISTDataset(anno_dir=FLAGS.anno_dir)
 99 | 
100 |   for env_name, seq_names in aist_dataset.mapping_env2seq.items():
101 |     # Init camera parameters
102 |     cgroup = init_env_cameras()
103 | 
104 |     # Select a set of sequences for optimizing camera parameters.
105 |     seq_names = random.choices(seq_names, k=20)
106 | 
107 |     # Load 2D keypoints
108 |     keypoints2d_all = []
109 |     for seq_name in seq_names:
110 |       keypoints2d_raw, _, _ = AISTDataset.load_keypoint2d(
111 |           aist_dataset.keypoint2d_dir, seq_name=seq_name)
112 |       # Special cases
113 |       if seq_name == 'gBR_sBM_cAll_d04_mBR0_ch01':
114 |         keypoints2d_raw[4] = np.nan  # not synced view
115 |       if seq_name == 'gJB_sBM_cAll_d07_mJB3_ch05':
116 |         keypoints2d_raw[6] = np.nan  # size 640x480
117 |       keypoints2d_all.append(keypoints2d_raw)
118 |     keypoints2d_all = np.concatenate(keypoints2d_all, axis=1)
119 | 
120 |     # Filter keypoints to select those best points
121 |     kpt_thre = 0.5
122 |     ignore_idxs = np.where(keypoints2d_all[:, :, :, 2] < kpt_thre)
123 |     keypoints2d_all[ignore_idxs[0], ignore_idxs[1], ignore_idxs[2], :] = np.nan
124 |     keypoints2d_all = keypoints2d_all[..., 0:2]
125 | 
126 |     # Apply bundle adjustment and dump the camera parameters
127 |     nviews = keypoints2d_all.shape[0]
128 |     cgroup.bundle_adjust_iter(
129 |         keypoints2d_all.reshape(nviews, -1, 2),
130 |         n_iters=20,
131 |         n_samp_iter=500,
132 |         n_samp_full=5000,
133 |         verbose=True)
134 |     os.makedirs(FLAGS.save_dir, exist_ok=True)
135 |     camera_file = os.path.join(FLAGS.save_dir, f'{env_name}.json')
136 |     with open(camera_file, 'w') as f:
137 |       json.dump([camera.get_dict() for camera in cgroup.cameras], f)
138 | 
139 |     # visualize the world with one frame
140 |     if FLAGS.visualize:
141 |       print("seq_name:", seq_name)
142 |       axes_all = plot_cameras(cgroup)
143 |       keypoints3d = cgroup.triangulate(
144 |           keypoints2d_all[:, 0].reshape(nviews, -1, 2)
145 |       ).reshape(-1, 3)
146 |       vedo.show(
147 |         *axes_all, vedo.Points(keypoints3d, r=12), 
148 |         interactive=True, axes=True)
149 |       vedo.clear()
150 | 
151 | 
152 | if __name__ == '__main__':
153 |   app.run(main)
154 | 


--------------------------------------------------------------------------------
/processing/run_estimate_keypoints.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Google AI Perception Team Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Estimate AIST++ 3D keypoints."""
 16 | import os
 17 | import pickle
 18 | 
 19 | from absl import app
 20 | from absl import flags
 21 | from absl import logging
 22 | from aist_plusplus.loader import AISTDataset
 23 | import numpy as np
 24 | 
 25 | FLAGS = flags.FLAGS
 26 | 
 27 | flags.DEFINE_list(
 28 |     'sequence_names',
 29 |     None,
 30 |     'list of sequence names to be processed. None means to process all.')
 31 | flags.DEFINE_string(
 32 |     'anno_dir',
 33 |     '/home/ruilongli/data/AIST++_openpose/',
 34 |     'input local dictionary for AIST++ annotations.')
 35 | flags.DEFINE_string(
 36 |     'save_dir',
 37 |     '/home/ruilongli/data/AIST++_openpose/keypoints3d/',
 38 |     'output local dictionary that stores AIST++ 3D keypoints.')
 39 | flags.DEFINE_enum(
 40 |     'data_type',
 41 |     'openpose',
 42 |     ['internal', 'openpose'],
 43 |     'Which openpose detector is being used.'
 44 | )
 45 | 
 46 | np.random.seed(0)
 47 | 
 48 | 
 49 | def main(_):
 50 |   aist_dataset = AISTDataset(anno_dir=FLAGS.anno_dir)
 51 | 
 52 |   if FLAGS.sequence_names:
 53 |       seq_names = FLAGS.sequence_names
 54 |   else:
 55 |       seq_names = aist_dataset.mapping_seq2env.keys()
 56 | 
 57 |   for seq_name in seq_names:
 58 |     logging.info('processing %s', seq_name)
 59 |     env_name = aist_dataset.mapping_seq2env[seq_name]
 60 | 
 61 |     # Load camera parameters
 62 |     cgroup = AISTDataset.load_camera_group(aist_dataset.camera_dir, env_name)
 63 | 
 64 |     # load 2D keypoints
 65 |     keypoints2d, det_scores, _ = AISTDataset.load_keypoint2d(
 66 |         aist_dataset.keypoint2d_dir, seq_name=seq_name)
 67 |     nviews, nframes, _, _ = keypoints2d.shape
 68 |     assert det_scores.shape[0] == nviews
 69 |     assert det_scores.shape[1] == nframes
 70 |     if seq_name == 'gBR_sBM_cAll_d04_mBR0_ch01':
 71 |       keypoints2d[4] = np.nan  # not synced view
 72 |     if seq_name == 'gJB_sBM_cAll_d07_mJB3_ch05':
 73 |       keypoints2d[6] = np.nan  # size 640x480
 74 | 
 75 |     # filter keypoints to select those best points
 76 |     kpt_thre = 0.15
 77 |     ignore_idxs = np.where(keypoints2d[:, :, :, 2] < kpt_thre)
 78 |     keypoints2d[ignore_idxs[0], ignore_idxs[1], ignore_idxs[2], :] = np.nan
 79 |     det_thre = 0.0
 80 |     ignore_idxs = np.where(det_scores < det_thre)
 81 |     keypoints2d[ignore_idxs[0], ignore_idxs[1], :, :] = np.nan
 82 |     keypoints2d = keypoints2d[:, :, :, 0:2]
 83 | 
 84 |     # 3D pose triangulation and temporal optimization.
 85 |     if FLAGS.data_type == "internal":
 86 |       # COCO-format bone constrains
 87 |       bones = [
 88 |           (5, 7), (7, 9), (6, 8), (8, 10), (11, 13), (13, 15), (12, 14),
 89 |           (14, 16), (0, 1), (0, 2), (1, 2), (0, 3), (0, 4), (3, 4),
 90 |       ]
 91 |     elif FLAGS.data_type == "openpose":
 92 |       # https://cmu-perceptual-computing-lab.github.io/openpose/web/html/doc/md_doc_02_output.html
 93 |       body_bones = np.array([
 94 |           (0, 15), (0, 16), (15, 17), (16, 18),
 95 |           (0, 1), (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7), (1, 8), 
 96 |           (8, 9), (9, 10), (10, 11), (11, 24), (11, 22), (11, 23), (22, 23), (23, 24), (24, 22),
 97 |           (8, 12), (12, 13), (13, 14), (14, 21), (14, 19), (14, 20), (19, 20), (20, 21), (21, 19)
 98 |       ])
 99 |       bones = body_bones.tolist()
100 |       # hand_bones = np.array([
101 |       #     (0, 1), (1, 2), (2, 3), (3, 4),
102 |       #     (0, 5), (5, 6), (6, 7), (7, 8),
103 |       #     (0, 9), (9, 10), (10, 11), (11, 12),
104 |       #     (0, 13), (13, 14), (14, 15), (15, 16),
105 |       #     (0, 17), (17, 18), (18, 19), (19, 20)
106 |       # ])
107 |       # bones = np.concatenate([
108 |       #     body_bones, hand_bones + 25, hand_bones + 25 + 21]).tolist()
109 |     else:
110 |       raise ValueError(FLAGS.data_type)
111 |     keypoints3d = cgroup.triangulate(
112 |         keypoints2d.reshape(nviews, -1, 2)
113 |     ).reshape(nframes, -1, 3)
114 |     keypoints3d_optim = cgroup.triangulate_optim(
115 |         keypoints2d, constraints=bones, verbose=True
116 |     ).reshape(nframes, -1, 3)
117 | 
118 |     # Save to pkl
119 |     os.makedirs(FLAGS.save_dir, exist_ok=True)
120 |     keypoints_file = os.path.join(FLAGS.save_dir, f'{seq_name}.pkl')
121 |     with open(keypoints_file, 'wb') as f:
122 |       pickle.dump({
123 |           'keypoints3d': keypoints3d,
124 |           'keypoints3d_optim': keypoints3d_optim,
125 |       }, f, protocol=pickle.HIGHEST_PROTOCOL)
126 | 
127 | 
128 | if __name__ == '__main__':
129 |   app.run(main)
130 | 


--------------------------------------------------------------------------------
/processing/run_estimate_smpl.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Google AI Perception Team Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Estimate AIST++ SMPL-format Motion."""
 16 | import os
 17 | import pickle
 18 | 
 19 | from absl import app
 20 | from absl import flags
 21 | from absl import logging
 22 | from aist_plusplus.loader import AISTDataset
 23 | from aist_plusplus.utils import unify_joint_mappings
 24 | import numpy as np
 25 | from smplx import SMPL
 26 | import torch
 27 | 
 28 | try:
 29 |     import vedo, trimesh
 30 |     SUPPORT_VIS = True
 31 | except:
 32 |     SUPPORT_VIS = False
 33 | 
 34 | FLAGS = flags.FLAGS
 35 | flags.DEFINE_list(
 36 |     'sequence_names',
 37 |     None,
 38 |     'list of sequence names to be processed. None means to process all.')
 39 | flags.DEFINE_string(
 40 |     'anno_dir',
 41 |     '/home/ruilongli/data/AIST++_openpose/',
 42 |     'input local dictionary for AIST++ annotations.')
 43 | flags.DEFINE_string(
 44 |     'smpl_dir',
 45 |     '/home/ruilongli/data/smpl_model/smpl/',
 46 |     'input local dictionary that stores SMPL data.')
 47 | flags.DEFINE_string(
 48 |     'save_dir',
 49 |     '/home/ruilongli/data/AIST++_openpose/motions/',
 50 |     'output local dictionary that stores AIST++ SMPL-format motion data.')
 51 | flags.DEFINE_bool(
 52 |     'visualize',
 53 |     False,
 54 |     'Wether to visualize the fitting process.')
 55 | flags.DEFINE_enum(
 56 |     'data_type',
 57 |     'openpose',
 58 |     ['internal', 'openpose'],
 59 |     'Which openpose detector is being used.')
 60 | np.random.seed(0)
 61 | torch.manual_seed(0)
 62 | 
 63 | 
 64 | class SMPLRegressor:
 65 |   """SMPL fitting based on 3D keypoints."""
 66 | 
 67 |   def __init__(self, smpl_model_path, smpl_model_gener='MALE'):
 68 |     # Fitting hyper-parameters
 69 |     self.base_lr = 100.0
 70 |     self.niter = 10000
 71 |     self.metric = torch.nn.MSELoss()
 72 |     self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 73 |     self.smpl_model_path = smpl_model_path
 74 |     self.smpl_model_gender = smpl_model_gener
 75 | 
 76 |     # Mapping to unify joint definations
 77 |     self.joints_mapping_smpl = unify_joint_mappings(dataset='smpl')
 78 | 
 79 |   def get_optimizer(self, smpl, step, base_lr):
 80 |     """Setup opimizer with a warm up learning rate."""
 81 |     if step < 100:
 82 |       optimizer = torch.optim.SGD([
 83 |           {'params': [smpl.transl], 'lr': base_lr},
 84 |           {'params': [smpl.scaling], 'lr': base_lr * 0.01},
 85 |           {'params': [smpl.global_orient], 'lr': 0.0},
 86 |           {'params': [smpl.body_pose], 'lr': 0.0},
 87 |           {'params': [smpl.betas], 'lr': 0.0},
 88 |       ])
 89 | 
 90 |     elif step < 400:
 91 |       optimizer = torch.optim.SGD([
 92 |           {'params': [smpl.transl], 'lr': base_lr},
 93 |           {'params': [smpl.scaling], 'lr': base_lr * 0.01},
 94 |           {'params': [smpl.global_orient], 'lr': base_lr * 0.001},
 95 |           {'params': [smpl.body_pose], 'lr': 0.0},
 96 |           {'params': [smpl.betas], 'lr': 0.0},
 97 |       ])
 98 | 
 99 |     else:
100 |       optimizer = torch.optim.SGD([
101 |           {'params': [smpl.transl], 'lr': base_lr},
102 |           {'params': [smpl.scaling], 'lr': base_lr * 0.01},
103 |           {'params': [smpl.global_orient], 'lr': base_lr * 0.001},
104 |           {'params': [smpl.body_pose], 'lr': base_lr * 0.001},
105 |           {'params': [smpl.betas], 'lr': 0.0},
106 |       ])
107 |     return optimizer
108 | 
109 |   def fit(self, keypoints3d, dtype='coco', verbose=True):
110 |     """Run fitting to optimize the SMPL parameters."""
111 |     assert len(keypoints3d.shape) == 3, 'input shape should be [N, njoints, 3]'
112 |     mapping_target = unify_joint_mappings(dataset=dtype)
113 |     keypoints3d = keypoints3d[:, mapping_target, :]
114 |     keypoints3d = torch.from_numpy(keypoints3d).float().to(self.device)
115 |     batch_size, njoints = keypoints3d.shape[0:2]
116 | 
117 |     # Init learnable smpl model
118 |     smpl = SMPL(
119 |         model_path=self.smpl_model_path,
120 |         gender=self.smpl_model_gender,
121 |         batch_size=batch_size).to(self.device)
122 | 
123 |     # Start fitting
124 |     for step in range(self.niter):
125 |       optimizer = self.get_optimizer(smpl, step, self.base_lr)
126 | 
127 |       output = smpl.forward()
128 |       joints = output.joints[:, self.joints_mapping_smpl[:njoints], :]
129 |       loss = self.metric(joints, keypoints3d)
130 | 
131 |       optimizer.zero_grad()
132 |       loss.backward()
133 |       optimizer.step()
134 | 
135 |       if verbose and step % 10 == 0:
136 |         logging.info(f'step {step:03d}; loss {loss.item():.3f};')
137 | 
138 |       if FLAGS.visualize:
139 |         vertices = output.vertices[0].detach().cpu().numpy()  # first frame
140 |         mesh = trimesh.Trimesh(vertices, smpl.faces)
141 |         mesh.visual.face_colors = [200, 200, 250, 100]
142 |         pts = vedo.Points(keypoints3d[0].detach().cpu().numpy(), r=20)  # first frame
143 |         vedo.show(mesh, pts, interactive=False)
144 | 
145 |     # Return results
146 |     return smpl, loss.item()
147 | 
148 | 
149 | def main(_):
150 |   if FLAGS.visualize:
151 |     assert SUPPORT_VIS, "--visualize is not support! Fail to import vedo or trimesh."
152 | 
153 |   aist_dataset = AISTDataset(FLAGS.anno_dir)
154 |   smpl_regressor = SMPLRegressor(FLAGS.smpl_dir, 'MALE')
155 | 
156 |   if FLAGS.sequence_names:
157 |     seq_names = FLAGS.sequence_names
158 |   else:
159 |     seq_names = aist_dataset.mapping_seq2env.keys()
160 | 
161 |   for seq_name in seq_names:
162 |     logging.info('processing %s', seq_name)
163 | 
164 |     # load 3D keypoints
165 |     keypoints3d = AISTDataset.load_keypoint3d(
166 |         aist_dataset.keypoint3d_dir, seq_name, use_optim=True)
167 | 
168 |     # SMPL fitting
169 |     if FLAGS.data_type == "internal":
170 |       smpl, loss = smpl_regressor.fit(keypoints3d, dtype='coco', verbose=True)
171 |     elif FLAGS.data_type == "openpose":
172 |       smpl, loss = smpl_regressor.fit(keypoints3d, dtype='openpose25', verbose=True)
173 |     else:
174 |       raise ValueError(FLAGS.data_type)
175 | 
176 |     # One last time forward
177 |     with torch.no_grad():
178 |       _ = smpl.forward()
179 |     body_pose = smpl.body_pose.detach().cpu().numpy()
180 |     global_orient = smpl.global_orient.detach().cpu().numpy()
181 |     smpl_poses = np.concatenate([global_orient, body_pose], axis=1)
182 |     smpl_scaling = smpl.scaling.detach().cpu().numpy()
183 |     smpl_trans = smpl.transl.detach().cpu().numpy()
184 | 
185 |     os.makedirs(FLAGS.save_dir, exist_ok=True)
186 |     motion_file = os.path.join(FLAGS.save_dir, f'{seq_name}.pkl')
187 |     with open(motion_file, 'wb') as f:
188 |       pickle.dump({
189 |           'smpl_poses': smpl_poses,
190 |           'smpl_scaling': smpl_scaling,
191 |           'smpl_trans': smpl_trans,
192 |           'smpl_loss': loss,
193 |       }, f, protocol=pickle.HIGHEST_PROTOCOL)
194 | 
195 | 
196 | if __name__ == '__main__':
197 |   app.run(main)
198 | 


--------------------------------------------------------------------------------
/processing/run_openpose.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google AI Perception Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Detect frame-by-frame 2D keypoints using openpose."""
16 | import os
17 | import logging
18 | 
19 | from absl import app
20 | from absl import flags
21 | from aist_plusplus.loader import AISTDataset
22 | from aist_plusplus.utils import ffmpeg_video_to_images
23 | 
24 | FLAGS = flags.FLAGS
25 | flags.DEFINE_list(
26 |     'sequence_names',
27 |     None,
28 |     'list of sequence names to be processed. None means to process all.')
29 | flags.DEFINE_string(
30 |     'anno_dir',
31 |     '/home/ruilongli/data/AIST++_openpose/',
32 |     'input local dictionary for AIST++ annotations.')
33 | flags.DEFINE_string(
34 |     'openpose_dir',
35 |     '/home/ruilongli/workspace/openpose',
36 |     'input openpose repo that contains the executable.')
37 | flags.DEFINE_string(
38 |     'video_dir',
39 |     '/home/ruilongli/data/AIST/videos/10M/',
40 |     'input local dictionary for AIST Dance Videos.')
41 | flags.DEFINE_string(
42 |     'image_save_dir',
43 |     '/home/ruilongli/data/AIST/images/10M/',
44 |     'output local dictionary that stores AIST images.')
45 | flags.DEFINE_string(
46 |     'openpose_save_dir',
47 |     '/home/ruilongli/data/AIST++_openpose/openpose',
48 |     'output local dictionary that stores AIST++ openpose results.')
49 | 
50 | 
51 | def main(_):
52 |     os.makedirs(FLAGS.image_save_dir, exist_ok=True)
53 |     os.makedirs(FLAGS.openpose_save_dir, exist_ok=True)
54 | 
55 |     if FLAGS.sequence_names:
56 |         seq_names = FLAGS.sequence_names
57 |     else:
58 |         aist_dataset = AISTDataset(FLAGS.anno_dir)
59 |         seq_names = aist_dataset.mapping_seq2env.keys()
60 | 
61 |     for seq_name in seq_names:
62 |         for view in AISTDataset.VIEWS:
63 |             video_name = AISTDataset.get_video_name(seq_name, view)
64 |             video_file = os.path.join(FLAGS.video_dir, video_name + ".mp4")
65 |             if not os.path.exists(video_file):
66 |                 continue
67 |             logging.info('processing %s', video_file)
68 |             
69 |             # extract images
70 |             image_dir = os.path.join(FLAGS.image_save_dir, video_name)
71 |             ffmpeg_video_to_images(video_file, image_dir, fps=60)
72 |             
73 |             # extract keypoints
74 |             save_dir = os.path.join(FLAGS.openpose_save_dir, video_name)
75 |             os.system(
76 |                 "cd %s; " % FLAGS.openpose_dir +
77 |                 "./build/examples/openpose/openpose.bin " +
78 |                     "--image_dir %s " % image_dir +
79 |                     "--write_json %s " % save_dir +
80 |                     "--display 0 --hand --face --render_pose 0"
81 |             )
82 | 
83 | if __name__ == '__main__':
84 |     app.run(main)
85 | 


--------------------------------------------------------------------------------
/processing/run_preprocessing.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Google AI Perception Team Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Process frame-by-frame keypoints detection results to pkl."""
 16 | import glob
 17 | import json
 18 | import multiprocessing
 19 | import os
 20 | import pickle
 21 | 
 22 | from absl import app
 23 | from absl import flags
 24 | from absl import logging
 25 | from aist_plusplus.loader import AISTDataset
 26 | import numpy as np
 27 | 
 28 | FLAGS = flags.FLAGS
 29 | 
 30 | flags.DEFINE_list(
 31 |     'sequence_names',
 32 |     None,
 33 |     'list of sequence names to be processed. None means to process all.')
 34 | flags.DEFINE_string(
 35 |     'keypoints_dir',
 36 |     '/home/ruilongli/data/AIST++_openpose/openpose/',
 37 |     'input local dictionary that stores 2D keypoints detection results in json.'
 38 | )
 39 | flags.DEFINE_string(
 40 |     'save_dir',
 41 |     '/home/ruilongli/data/AIST++_openpose/keypoints2d/',
 42 |     'output local dictionary that stores 2D keypoints detection results in pkl.'
 43 | )
 44 | flags.DEFINE_enum(
 45 |     'data_type',
 46 |     'openpose',
 47 |     ['internal', 'openpose'],
 48 |     'Which openpose detector is being used.'
 49 | )
 50 | 
 51 | 
 52 | def array_nan(shape, dtype=np.float32):
 53 |   array = np.empty(shape, dtype=dtype)
 54 |   array[:] = np.nan
 55 |   return array
 56 | 
 57 | 
 58 | def load_keypoints2d_file(file_path):
 59 |   """load 2D keypoints from keypoint detection results.
 60 | 
 61 |   Only one person is extracted from the results. If there are multiple
 62 |   persons in the prediction results, we select the one with the highest
 63 |   detection score.
 64 | 
 65 |   Args:
 66 |     file_path: the json file path.
 67 |     njoints: number of joints in the keypoint defination.
 68 | 
 69 |   Returns:
 70 |     A `np.array` with the shape of [njoints, 3].
 71 |   """
 72 |   if FLAGS.data_type == "internal":
 73 |     njoints = 17
 74 |   elif FLAGS.data_type == "openpose":
 75 |     njoints = 25
 76 |   else:
 77 |     raise ValueError(FLAGS.data_type)
 78 | 
 79 |   keypoint = array_nan((njoints, 3), dtype=np.float32)
 80 |   det_score = 0.0
 81 | 
 82 |   try:
 83 |     with open(file_path, 'r') as f:
 84 |       data = json.load(f)
 85 |   except Exception as e:  # pylint: disable=broad-except
 86 |     logging.warning(e)
 87 |     return keypoint, det_score
 88 | 
 89 |   if FLAGS.data_type == "internal":
 90 |     keypoints = np.array(data['keypoints']).reshape((-1, njoints, 3))
 91 |     det_scores = np.array(data['detection_scores'])
 92 |   elif FLAGS.data_type == "openpose":
 93 |     keypoints = []
 94 |     for person in data["people"]:
 95 |       # npoints: 25, 70, 21, 21
 96 |       # for key in ["pose", "face", "hand_left", "hand_right"]:
 97 |       for key in ["pose"]:
 98 |         keypoints.extend(person["%s_keypoints_2d" % key])
 99 |     keypoints = np.array(keypoints).reshape(len(data["people"]), -1, 3)
100 |     assert keypoints.shape[1] == njoints, (
101 |       "The shape is not right. %s v.s. %d" (str(keypoints.shape), njoints)
102 |     )
103 |     det_scores = np.mean(keypoints[:, 0:25, -1], axis=-1)
104 |   else:
105 |     raise ValueError(FLAGS.data_type)
106 | 
107 |   # The detection results may contain zero person or multiple people.
108 |   if det_scores.shape[0] == 0:
109 |     # There is no person in this image. We set NaN to this frame.
110 |     return keypoint, det_score
111 |   else:
112 |     # There are multiple people (>=1) in this image. We select the one with
113 |     # the highest detection score.
114 |     idx = np.argmax(det_scores)
115 |     keypoint = keypoints[idx]
116 |     det_score = det_scores[idx]
117 |     return keypoint, det_score
118 | 
119 | 
120 | def load_keypoints2d(data_dir, seq_name):
121 |   """Load 2D keypoints predictions for a set of multi-view videos."""
122 |   # Parsing sequence name to multi-view video names
123 |   video_names = [AISTDataset.get_video_name(seq_name, view)
124 |                  for view in AISTDataset.VIEWS]
125 | 
126 |   # In case frames are missing, we first scan all views to get a union
127 |   # of timestamps.
128 |   paths_cache = {}
129 |   timestamps = []
130 |   for video_name in video_names:
131 |     paths = sorted(glob.glob(os.path.join(data_dir, video_name, '*.json')))
132 |     paths_cache[video_name] = paths
133 |     if FLAGS.data_type == "internal":
134 |       timestamps += [
135 |         int(os.path.basename(p).split('.')[0].split('_')[-1]) for p in paths]
136 |     elif FLAGS.data_type == "openpose":
137 |       timestamps += [
138 |         int(os.path.basename(p).split('.')[0].split('_')[0]) for p in paths]
139 |     else:
140 |       raise ValueError(FLAGS.data_type)
141 |   timestamps = np.array(sorted(list(set(timestamps))))  # (N,)
142 | 
143 |   # Then we load all frames according to timestamps.
144 |   keypoints2d = []
145 |   det_scores = []
146 |   for video_name in video_names:
147 |     if FLAGS.data_type == "internal":
148 |       paths = [
149 |           os.path.join(data_dir, video_name, f'{video_name}_{ts}.json')
150 |           for ts in timestamps
151 |       ]
152 |     elif FLAGS.data_type == "openpose":
153 |       paths = [
154 |           os.path.join(data_dir, video_name, f'{ts:08d}_keypoints.json')
155 |           for ts in timestamps
156 |       ]
157 |     else:
158 |       raise ValueError(FLAGS.data_type)
159 |     keypoints2d_per_view = []
160 |     det_scores_per_view = []
161 |     for path in paths:
162 |       keypoint, det_score = load_keypoints2d_file(path)
163 |       keypoints2d_per_view.append(keypoint)
164 |       det_scores_per_view.append(det_score)
165 |     keypoints2d.append(keypoints2d_per_view)
166 |     det_scores.append(det_scores_per_view)
167 | 
168 |   keypoints2d = np.array(
169 |       keypoints2d, dtype=np.float32)  # (nviews, N, njoints, 3)
170 |   det_scores = np.array(
171 |       det_scores, dtype=np.float32)  # (nviews, N)
172 |   return keypoints2d, det_scores, timestamps
173 | 
174 | 
175 | def process_and_save(seq_name):
176 |   keypoints2d, det_scores, timestamps = load_keypoints2d(
177 |       FLAGS.keypoints_dir, seq_name=seq_name)
178 |   os.makedirs(FLAGS.save_dir, exist_ok=True)
179 |   save_path = os.path.join(FLAGS.save_dir, f'{seq_name}.pkl')
180 |   with open(save_path, 'wb') as f:
181 |     pickle.dump({
182 |         'keypoints2d': keypoints2d,
183 |         'det_scores': det_scores,
184 |         'timestamps': timestamps,
185 |     }, f, protocol=pickle.HIGHEST_PROTOCOL)
186 | 
187 | 
188 | def main(_):
189 |   if FLAGS.sequence_names:
190 |       seq_names = FLAGS.sequence_names
191 |   else:
192 |       aist_dataset = AISTDataset(FLAGS.anno_dir)
193 |       seq_names = aist_dataset.mapping_seq2env.keys()
194 | 
195 |   pool = multiprocessing.Pool(16)
196 |   pool.map(process_and_save, seq_names)
197 | 
198 | 
199 | if __name__ == '__main__':
200 |   app.run(main)
201 | 
202 | 


--------------------------------------------------------------------------------
/processing/run_segmentation.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Google AI Perception Team Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Estimate foreground masks."""
 16 | import os
 17 | 
 18 | from absl import app
 19 | from absl import flags
 20 | from absl import logging
 21 | from aist_plusplus.loader import AISTDataset
 22 | from aist_plusplus.utils import ffmpeg_video_read
 23 | import numpy as np
 24 | import torch
 25 | import imageio
 26 | import tqdm
 27 | 
 28 | FLAGS = flags.FLAGS
 29 | flags.DEFINE_list(
 30 |     'sequence_names',
 31 |     None,
 32 |     'list of sequence names to be processed. None means to process all.')
 33 | flags.DEFINE_string(
 34 |     'anno_dir',
 35 |     '/home/ruilongli/data/AIST++_openpose/',
 36 |     'input local dictionary for AIST++ annotations.')
 37 | flags.DEFINE_string(
 38 |     'video_dir',
 39 |     '/home/ruilongli/data/AIST/videos/10M/',
 40 |     'input local dictionary for AIST Dance Videos.')
 41 | flags.DEFINE_string(
 42 |     'save_dir',
 43 |     '/home/ruilongli/data/AIST++_openpose/segmentation/',
 44 |     'output local dictionary that stores AIST++ segmentation masks.')
 45 | np.random.seed(0)
 46 | 
 47 | 
 48 | def estimate_background(input_video: str, alpha_video: str, output_image: str):
 49 |     video_reader = imageio.get_reader(input_video)
 50 |     alpha_reader = imageio.get_reader(alpha_video)
 51 |     background, weights = 0, 0
 52 |     for img, alpha in tqdm.tqdm(zip(video_reader, alpha_reader)):
 53 |         weight = (1 - np.float32(alpha) / 255.0)
 54 |         weights += weight
 55 |         background += np.float32(img) * weight
 56 |     background /= (weights + 1e-8)
 57 |     imageio.imwrite(output_image, np.uint8(background))
 58 | 
 59 | 
 60 | def main(_):
 61 |     # Here we use https://github.com/PeterL1n/RobustVideoMatting (GPL-3.0 License)
 62 |     # to get an initial alpha matting prediction.
 63 |     model = torch.hub.load("PeterL1n/RobustVideoMatting", "resnet50").cuda()
 64 |     converter = torch.hub.load("PeterL1n/RobustVideoMatting", "converter")
 65 | 
 66 |     # Here we use https://github.com/PeterL1n/BackgroundMattingV2 (MIT License)
 67 |     # to get an accurate alpha matting.
 68 |     if not os.path.exists("/tmp/model.pth"):
 69 |         os.system("gdown https://drive.google.com/uc?id=1ErIAsB_miVhYL9GDlYUmfbqlV293mSYf -O /tmp/model.pth -q")
 70 |     if not os.path.exists("/tmp/BackgroundMattingV2"):
 71 |         os.system("cd /tmp/; git clone -q https://github.com/PeterL1n/BackgroundMattingV2")
 72 | 
 73 |     if FLAGS.sequence_names:
 74 |         seq_names = FLAGS.sequence_names
 75 |     else:
 76 |         aist_dataset = AISTDataset(FLAGS.anno_dir)
 77 |         seq_names = aist_dataset.mapping_seq2env.keys()
 78 | 
 79 |     os.makedirs(FLAGS.save_dir, exist_ok=True)
 80 |     for seq_name in seq_names:    
 81 |         for view in AISTDataset.VIEWS:
 82 |             video_name = AISTDataset.get_video_name(seq_name, view)
 83 |             video_file = os.path.join(FLAGS.video_dir, video_name + ".mp4")
 84 |             if not os.path.exists(video_file):
 85 |                 continue
 86 | 
 87 |             # step 1. initial alpha matting prediction (not accurate enough).
 88 |             logging.info('processing %s', video_file)
 89 |             alpha_file = os.path.join(FLAGS.save_dir, video_name + "_alpha1.mp4")
 90 |             if not os.path.exists(alpha_file):
 91 |                 converter(
 92 |                     model,                           # The loaded model, can be on any device (cpu or cuda).
 93 |                     input_source=video_file,         # A video file or an image sequence directory.
 94 |                     downsample_ratio=None,           # [Optional] If None, make downsampled max size be 512px.
 95 |                     output_type='video',             # Choose "video" or "png_sequence"
 96 |                     output_alpha=alpha_file,         # [Optional] Output the raw alpha prediction.
 97 |                     output_video_mbps=4,             # Output video mbps. Not needed for png sequence.
 98 |                     seq_chunk=12,                    # Process n frames at once for better parallelism.
 99 |                     num_workers=1,                   # Only for image sequence input. Reader threads.
100 |                     progress=True                    # Print conversion progress.
101 |                 )
102 | 
103 |             # step 2. estimate the background image from the inital alpha matting prediction.
104 |             background_file = os.path.join(FLAGS.save_dir, video_name + "_bg.png")
105 |             if not os.path.exists(background_file):
106 |                 estimate_background(video_file, alpha_file, background_file)
107 | 
108 |             # step 3. estimate the more accurate alpha matting.
109 |             final_file = os.path.join(FLAGS.save_dir, video_name + "_alpha2")
110 |             if not os.path.exists(final_file):
111 |                 os.system(
112 |                     "cd /tmp/BackgroundMattingV2/; " +
113 |                     "python inference_video.py " +
114 |                         "--model-type mattingrefine " +
115 |                         "--model-backbone resnet50 " +
116 |                         "--model-backbone-scale 0.25 " +
117 |                         "--model-refine-mode sampling " +
118 |                         "--model-refine-sample-pixels 80000 " +
119 |                         "--model-checkpoint '/tmp/model.pth' " +
120 |                         "--video-src '%s' " % video_file +
121 |                         "--video-bgr '%s' " % background_file +
122 |                         "--output-dir '%s' " % final_file +
123 |                         "--output-type pha"
124 |                 )
125 |             if os.path.exists(final_file):
126 |                 os.system("mv %s/pha.mp4 %s.mp4; rm -rf %s" % (final_file, final_file, final_file))
127 | 
128 | 
129 | if __name__ == '__main__':
130 |   app.run(main)
131 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py==0.9.0
 2 | numpy
 3 | torch
 4 | torchvision
 5 | opencv-python
 6 | git+https://github.com/liruilong940607/aniposelib
 7 | git+https://github.com/liruilong940607/smplx
 8 | ffmpeg-python
 9 | imageio
10 | imageio-ffmpeg
11 | gdown


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google AI Perception Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | import setuptools
16 | 
17 | INSTALL_REQUIREMENTS = [
18 |     'absl-py', 'numpy', 'opencv-python', 'ffmpeg-python']
19 | 
20 | setuptools.setup(
21 |     name='aist_plusplus_api',
22 |     url='https://github.com/google/aistplusplus_api',
23 |     description='API for supporting AIST++ Dataset.',
24 |     version='1.1.0',
25 |     author='Ruilong Li',
26 |     author_email='ruilongli94@gmail.com',
27 |     packages=setuptools.find_packages(),
28 |     install_requires=INSTALL_REQUIREMENTS
29 | )
30 | 


--------------------------------------------------------------------------------