├── .gitignore ├── LICENSE ├── README.md ├── config ├── kinetics-skeleton │ ├── test_bone.yaml │ ├── test_joint.yaml │ ├── train_bone.yaml │ └── train_joint.yaml ├── nturgbd-cross-subject │ ├── test_bone.yaml │ ├── test_joint.yaml │ ├── train_bone.yaml │ ├── train_joint.yaml │ └── train_joint_aagcn.yaml └── nturgbd-cross-view │ ├── test_bone.yaml │ ├── test_joint.yaml │ ├── train_bone.yaml │ └── train_joint.yaml ├── data └── nturgbd_raw │ └── samples_with_missing_skeletons.txt ├── data_gen ├── __init__.py ├── gen_bone_data.py ├── gen_motion_data.py ├── kinetics_gendata.py ├── merge_joint_bone_data.py ├── ntu_gendata.py ├── preprocess.py └── rotation.py ├── ensemble.py ├── feeders ├── __init__.py ├── feeder.py └── tools.py ├── graph ├── __init__.py ├── kinetics.py ├── ntu_rgb_d.py └── tools.py ├── main.py └── model ├── __init__.py ├── aagcn.py └── agcn.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | 3 | .idea/ 4 | runs/ 5 | work_dir/ 6 | 7 | ### Python template 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | 111 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution-NonCommercial 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More_considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-NonCommercial 4.0 International Public 58 | License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-NonCommercial 4.0 International Public License ("Public 63 | License"). To the extent this Public License may be interpreted as a 64 | contract, You are granted the Licensed Rights in consideration of Your 65 | acceptance of these terms and conditions, and the Licensor grants You 66 | such rights in consideration of benefits the Licensor receives from 67 | making the Licensed Material available under these terms and 68 | conditions. 69 | 70 | Section 1 -- Definitions. 71 | 72 | a. Adapted Material means material subject to Copyright and Similar 73 | Rights that is derived from or based upon the Licensed Material 74 | and in which the Licensed Material is translated, altered, 75 | arranged, transformed, or otherwise modified in a manner requiring 76 | permission under the Copyright and Similar Rights held by the 77 | Licensor. For purposes of this Public License, where the Licensed 78 | Material is a musical work, performance, or sound recording, 79 | Adapted Material is always produced where the Licensed Material is 80 | synched in timed relation with a moving image. 81 | 82 | b. Adapter's License means the license You apply to Your Copyright 83 | and Similar Rights in Your contributions to Adapted Material in 84 | accordance with the terms and conditions of this Public License. 85 | 86 | c. Copyright and Similar Rights means copyright and/or similar rights 87 | closely related to copyright including, without limitation, 88 | performance, broadcast, sound recording, and Sui Generis Database 89 | Rights, without regard to how the rights are labeled or 90 | categorized. For purposes of this Public License, the rights 91 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 92 | Rights. 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. NonCommercial means not primarily intended for or directed towards 116 | commercial advantage or monetary compensation. For purposes of 117 | this Public License, the exchange of the Licensed Material for 118 | other material subject to Copyright and Similar Rights by digital 119 | file-sharing or similar means is NonCommercial provided there is 120 | no payment of monetary compensation in connection with the 121 | exchange. 122 | 123 | j. Share means to provide material to the public by any means or 124 | process that requires permission under the Licensed Rights, such 125 | as reproduction, public display, public performance, distribution, 126 | dissemination, communication, or importation, and to make material 127 | available to the public including in ways that members of the 128 | public may access the material from a place and at a time 129 | individually chosen by them. 130 | 131 | k. Sui Generis Database Rights means rights other than copyright 132 | resulting from Directive 96/9/EC of the European Parliament and of 133 | the Council of 11 March 1996 on the legal protection of databases, 134 | as amended and/or succeeded, as well as other essentially 135 | equivalent rights anywhere in the world. 136 | 137 | l. You means the individual or entity exercising the Licensed Rights 138 | under this Public License. Your has a corresponding meaning. 139 | 140 | Section 2 -- Scope. 141 | 142 | a. License grant. 143 | 144 | 1. Subject to the terms and conditions of this Public License, 145 | the Licensor hereby grants You a worldwide, royalty-free, 146 | non-sublicensable, non-exclusive, irrevocable license to 147 | exercise the Licensed Rights in the Licensed Material to: 148 | 149 | a. reproduce and Share the Licensed Material, in whole or 150 | in part, for NonCommercial purposes only; and 151 | 152 | b. produce, reproduce, and Share Adapted Material for 153 | NonCommercial purposes only. 154 | 155 | 2. Exceptions and Limitations. For the avoidance of doubt, where 156 | Exceptions and Limitations apply to Your use, this Public 157 | License does not apply, and You do not need to comply with 158 | its terms and conditions. 159 | 160 | 3. Term. The term of this Public License is specified in Section 161 | 6(a). 162 | 163 | 4. Media and formats; technical modifications allowed. The 164 | Licensor authorizes You to exercise the Licensed Rights in 165 | all media and formats whether now known or hereafter created, 166 | and to make technical modifications necessary to do so. The 167 | Licensor waives and/or agrees not to assert any right or 168 | authority to forbid You from making technical modifications 169 | necessary to exercise the Licensed Rights, including 170 | technical modifications necessary to circumvent Effective 171 | Technological Measures. For purposes of this Public License, 172 | simply making modifications authorized by this Section 2(a) 173 | (4) never produces Adapted Material. 174 | 175 | 5. Downstream recipients. 176 | 177 | a. Offer from the Licensor -- Licensed Material. Every 178 | recipient of the Licensed Material automatically 179 | receives an offer from the Licensor to exercise the 180 | Licensed Rights under the terms and conditions of this 181 | Public License. 182 | 183 | b. No downstream restrictions. You may not offer or impose 184 | any additional or different terms or conditions on, or 185 | apply any Effective Technological Measures to, the 186 | Licensed Material if doing so restricts exercise of the 187 | Licensed Rights by any recipient of the Licensed 188 | Material. 189 | 190 | 6. No endorsement. Nothing in this Public License constitutes or 191 | may be construed as permission to assert or imply that You 192 | are, or that Your use of the Licensed Material is, connected 193 | with, or sponsored, endorsed, or granted official status by, 194 | the Licensor or others designated to receive attribution as 195 | provided in Section 3(a)(1)(A)(i). 196 | 197 | b. Other rights. 198 | 199 | 1. Moral rights, such as the right of integrity, are not 200 | licensed under this Public License, nor are publicity, 201 | privacy, and/or other similar personality rights; however, to 202 | the extent possible, the Licensor waives and/or agrees not to 203 | assert any such rights held by the Licensor to the limited 204 | extent necessary to allow You to exercise the Licensed 205 | Rights, but not otherwise. 206 | 207 | 2. Patent and trademark rights are not licensed under this 208 | Public License. 209 | 210 | 3. To the extent possible, the Licensor waives any right to 211 | collect royalties from You for the exercise of the Licensed 212 | Rights, whether directly or through a collecting society 213 | under any voluntary or waivable statutory or compulsory 214 | licensing scheme. In all other cases the Licensor expressly 215 | reserves any right to collect such royalties, including when 216 | the Licensed Material is used other than for NonCommercial 217 | purposes. 218 | 219 | Section 3 -- License Conditions. 220 | 221 | Your exercise of the Licensed Rights is expressly made subject to the 222 | following conditions. 223 | 224 | a. Attribution. 225 | 226 | 1. If You Share the Licensed Material (including in modified 227 | form), You must: 228 | 229 | a. retain the following if it is supplied by the Licensor 230 | with the Licensed Material: 231 | 232 | i. identification of the creator(s) of the Licensed 233 | Material and any others designated to receive 234 | attribution, in any reasonable manner requested by 235 | the Licensor (including by pseudonym if 236 | designated); 237 | 238 | ii. a copyright notice; 239 | 240 | iii. a notice that refers to this Public License; 241 | 242 | iv. a notice that refers to the disclaimer of 243 | warranties; 244 | 245 | v. a URI or hyperlink to the Licensed Material to the 246 | extent reasonably practicable; 247 | 248 | b. indicate if You modified the Licensed Material and 249 | retain an indication of any previous modifications; and 250 | 251 | c. indicate the Licensed Material is licensed under this 252 | Public License, and include the text of, or the URI or 253 | hyperlink to, this Public License. 254 | 255 | 2. You may satisfy the conditions in Section 3(a)(1) in any 256 | reasonable manner based on the medium, means, and context in 257 | which You Share the Licensed Material. For example, it may be 258 | reasonable to satisfy the conditions by providing a URI or 259 | hyperlink to a resource that includes the required 260 | information. 261 | 262 | 3. If requested by the Licensor, You must remove any of the 263 | information required by Section 3(a)(1)(A) to the extent 264 | reasonably practicable. 265 | 266 | 4. If You Share Adapted Material You produce, the Adapter's 267 | License You apply must not prevent recipients of the Adapted 268 | Material from complying with this Public License. 269 | 270 | Section 4 -- Sui Generis Database Rights. 271 | 272 | Where the Licensed Rights include Sui Generis Database Rights that 273 | apply to Your use of the Licensed Material: 274 | 275 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 276 | to extract, reuse, reproduce, and Share all or a substantial 277 | portion of the contents of the database for NonCommercial purposes 278 | only; 279 | 280 | b. if You include all or a substantial portion of the database 281 | contents in a database in which You have Sui Generis Database 282 | Rights, then the database in which You have Sui Generis Database 283 | Rights (but not its individual contents) is Adapted Material; and 284 | 285 | c. You must comply with the conditions in Section 3(a) if You Share 286 | all or a substantial portion of the contents of the database. 287 | 288 | For the avoidance of doubt, this Section 4 supplements and does not 289 | replace Your obligations under this Public License where the Licensed 290 | Rights include other Copyright and Similar Rights. 291 | 292 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 293 | 294 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 295 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 296 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 297 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 298 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 299 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 300 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 301 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 302 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 303 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 304 | 305 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 306 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 307 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 308 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 309 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 310 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 311 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 312 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 313 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 314 | 315 | c. The disclaimer of warranties and limitation of liability provided 316 | above shall be interpreted in a manner that, to the extent 317 | possible, most closely approximates an absolute disclaimer and 318 | waiver of all liability. 319 | 320 | Section 6 -- Term and Termination. 321 | 322 | a. This Public License applies for the term of the Copyright and 323 | Similar Rights licensed here. However, if You fail to comply with 324 | this Public License, then Your rights under this Public License 325 | terminate automatically. 326 | 327 | b. Where Your right to use the Licensed Material has terminated under 328 | Section 6(a), it reinstates: 329 | 330 | 1. automatically as of the date the violation is cured, provided 331 | it is cured within 30 days of Your discovery of the 332 | violation; or 333 | 334 | 2. upon express reinstatement by the Licensor. 335 | 336 | For the avoidance of doubt, this Section 6(b) does not affect any 337 | right the Licensor may have to seek remedies for Your violations 338 | of this Public License. 339 | 340 | c. For the avoidance of doubt, the Licensor may also offer the 341 | Licensed Material under separate terms or conditions or stop 342 | distributing the Licensed Material at any time; however, doing so 343 | will not terminate this Public License. 344 | 345 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 346 | License. 347 | 348 | Section 7 -- Other Terms and Conditions. 349 | 350 | a. The Licensor shall not be bound by any additional or different 351 | terms or conditions communicated by You unless expressly agreed. 352 | 353 | b. Any arrangements, understandings, or agreements regarding the 354 | Licensed Material not stated herein are separate from and 355 | independent of the terms and conditions of this Public License. 356 | 357 | Section 8 -- Interpretation. 358 | 359 | a. For the avoidance of doubt, this Public License does not, and 360 | shall not be interpreted to, reduce, limit, restrict, or impose 361 | conditions on any use of the Licensed Material that could lawfully 362 | be made without permission under this Public License. 363 | 364 | b. To the extent possible, if any provision of this Public License is 365 | deemed unenforceable, it shall be automatically reformed to the 366 | minimum extent necessary to make it enforceable. If the provision 367 | cannot be reformed, it shall be severed from this Public License 368 | without affecting the enforceability of the remaining terms and 369 | conditions. 370 | 371 | c. No term or condition of this Public License will be waived and no 372 | failure to comply consented to unless expressly agreed to by the 373 | Licensor. 374 | 375 | d. Nothing in this Public License constitutes or may be interpreted 376 | as a limitation upon, or waiver of, any privileges and immunities 377 | that apply to the Licensor or You, including from the legal 378 | processes of any jurisdiction or authority. 379 | 380 | ======================================================================= 381 | 382 | Creative Commons is not a party to its public 383 | licenses. Notwithstanding, Creative Commons may elect to apply one of 384 | its public licenses to material it publishes and in those instances 385 | will be considered the “Licensor.” The text of the Creative Commons 386 | public licenses is dedicated to the public domain under the CC0 Public 387 | Domain Dedication. Except for the limited purpose of indicating that 388 | material is shared under a Creative Commons public license or as 389 | otherwise permitted by the Creative Commons policies published at 390 | creativecommons.org/policies, Creative Commons does not authorize the 391 | use of the trademark "Creative Commons" or any other trademark or logo 392 | of Creative Commons without its prior written consent including, 393 | without limitation, in connection with any unauthorized modifications 394 | to any of its public licenses or any other arrangements, 395 | understandings, or agreements concerning use of licensed material. For 396 | the avoidance of doubt, this paragraph does not form part of the 397 | public licenses. 398 | 399 | Creative Commons may be contacted at creativecommons.org. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 2s-AGCN 2 | Two-Stream Adaptive Graph Convolutional Networks for Skeleton-Based Action Recognition in CVPR19 3 | 4 | # Note 5 | 6 | ~~PyTorch version should be 0.3! For PyTorch0.4 or higher, the codes need to be modified.~~ \ 7 | Now we have updated the code to >=Pytorch0.4. \ 8 | A new model named AAGCN is added, which can achieve better performance. 9 | 10 | # Data Preparation 11 | 12 | - Download the raw data from [NTU-RGB+D](https://github.com/shahroudy/NTURGB-D) and [Skeleton-Kinetics](https://github.com/yysijie/st-gcn). Then put them under the data directory: 13 | 14 | -data\ 15 | -kinetics_raw\ 16 | -kinetics_train\ 17 | ... 18 | -kinetics_val\ 19 | ... 20 | -kinetics_train_label.json 21 | -keintics_val_label.json 22 | -nturgbd_raw\ 23 | -nturgb+d_skeletons\ 24 | ... 25 | -samples_with_missing_skeletons.txt 26 | 27 | 28 | [https://github.com/shahroudy/NTURGB-D]: NTU-RGB+D 29 | [https://github.com/yysijie/st-gcn]: Skeleton-Kinetics 30 | 31 | - Preprocess the data with 32 | 33 | `python data_gen/ntu_gendata.py` 34 | 35 | `python data_gen/kinetics-gendata.py.` 36 | 37 | - Generate the bone data with: 38 | 39 | `python data_gen/gen_bone_data.py` 40 | 41 | # Training & Testing 42 | 43 | Change the config file depending on what you want. 44 | 45 | 46 | `python main.py --config ./config/nturgbd-cross-view/train_joint.yaml` 47 | 48 | `python main.py --config ./config/nturgbd-cross-view/train_bone.yaml` 49 | To ensemble the results of joints and bones, run test firstly to generate the scores of the softmax layer. 50 | 51 | `python main.py --config ./config/nturgbd-cross-view/test_joint.yaml` 52 | 53 | `python main.py --config ./config/nturgbd-cross-view/test_bone.yaml` 54 | 55 | Then combine the generated scores with: 56 | 57 | `python ensemble.py` --datasets ntu/xview 58 | 59 | # Citation 60 | Please cite the following paper if you use this repository in your reseach. 61 | 62 | @inproceedings{2sagcn2019cvpr, 63 | title = {Two-Stream Adaptive Graph Convolutional Networks for Skeleton-Based Action Recognition}, 64 | author = {Lei Shi and Yifan Zhang and Jian Cheng and Hanqing Lu}, 65 | booktitle = {CVPR}, 66 | year = {2019}, 67 | } 68 | 69 | @article{shi_skeleton-based_2019, 70 | title = {Skeleton-{Based} {Action} {Recognition} with {Multi}-{Stream} {Adaptive} {Graph} {Convolutional} {Networks}}, 71 | journal = {arXiv:1912.06971 [cs]}, 72 | author = {Shi, Lei and Zhang, Yifan and Cheng, Jian and LU, Hanqing}, 73 | month = dec, 74 | year = {2019}, 75 | } 76 | # Contact 77 | For any questions, feel free to contact: `lei.shi@nlpr.ia.ac.cn` -------------------------------------------------------------------------------- /config/kinetics-skeleton/test_bone.yaml: -------------------------------------------------------------------------------- 1 | # feeder 2 | feeder: feeders.feeder.Feeder 3 | test_feeder_args: 4 | data_path: ./data/kinetics/val_data_bone.npy 5 | label_path: ./data/kinetics/val_label.pkl 6 | debug: False 7 | 8 | # model 9 | model: model.agcn.Model 10 | model_args: 11 | num_class: 400 12 | num_person: 2 13 | num_point: 18 14 | graph: graph.kinetics.Graph 15 | graph_args: 16 | labeling_mode: 'spatial' 17 | 18 | 19 | # test 20 | phase: test 21 | device: 0 22 | test_batch_size: 64 23 | weights: ./pre_train/ki_agcn_bone.pt 24 | 25 | 26 | work_dir: ./work_dir/kinetics/agcn_test_bone 27 | model_saved_name: ./runs/ki_agcn_test_bone 28 | save_score: True -------------------------------------------------------------------------------- /config/kinetics-skeleton/test_joint.yaml: -------------------------------------------------------------------------------- 1 | # feeder 2 | feeder: feeders.feeder.Feeder 3 | test_feeder_args: 4 | data_path: ./data/kinetics/val_data_joint.npy 5 | label_path: ./data/kinetics/val_label.pkl 6 | debug: False 7 | 8 | # model 9 | model: model.agcn.Model 10 | model_args: 11 | num_class: 400 12 | num_person: 2 13 | num_point: 18 14 | graph: graph.kinetics.Graph 15 | graph_args: 16 | labeling_mode: 'spatial' 17 | 18 | 19 | # test 20 | phase: test 21 | device: 0 22 | test_batch_size: 64 23 | weights: ./pre_train/ki_agcn.pt 24 | 25 | 26 | work_dir: ./work_dir/kinetics/agcn_joint_test 27 | model_saved_name: ./runs/ki_agcn_joint_test 28 | save_score: True -------------------------------------------------------------------------------- /config/kinetics-skeleton/train_bone.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/kinetics/agcn_bone 2 | model_saved_name: ./runs/ki_agcn_bone 3 | 4 | # feeder 5 | feeder: feeders.feeder.Feeder 6 | train_feeder_args: 7 | random_choose: True 8 | random_move: True 9 | window_size: 150 10 | data_path: ./data/kinetics/train_data_bone.npy 11 | label_path: ./data/kinetics/train_label.pkl 12 | debug: False 13 | 14 | test_feeder_args: 15 | data_path: ./data/kinetics/val_data_bone.npy 16 | label_path: ./data/kinetics/val_label.pkl 17 | 18 | # model 19 | model: model.agcn.Model 20 | model_args: 21 | num_class: 400 22 | num_person: 2 23 | num_point: 18 24 | graph: graph.kinetics.Graph 25 | graph_args: 26 | labeling_mode: 'spatial' 27 | 28 | #optim 29 | weight_decay: 0.0001 30 | base_lr: 0.1 31 | step: [45, 55] 32 | 33 | # training 34 | device: [0, 1, 2, 3] 35 | batch_size: 128 36 | test_batch_size: 256 37 | num_epoch: 65 38 | nesterov: True 39 | 40 | 41 | -------------------------------------------------------------------------------- /config/kinetics-skeleton/train_joint.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/kinetics/agcn_joint 2 | model_saved_name: ./runs/ki_agcn_joint 3 | 4 | # feeder 5 | feeder: feeders.feeder.Feeder 6 | train_feeder_args: 7 | random_choose: True 8 | random_move: True 9 | window_size: 150 10 | data_path: ./data/kinetics/train_data_joint.npy 11 | label_path: ./data/kinetics/train_label.pkl 12 | debug: False 13 | 14 | test_feeder_args: 15 | data_path: ./data/kinetics/val_data_joint.npy 16 | label_path: ./data/kinetics/val_label.pkl 17 | 18 | # model 19 | model: model.agcn.Model 20 | model_args: 21 | num_class: 400 22 | num_person: 2 23 | num_point: 18 24 | graph: graph.kinetics.Graph 25 | graph_args: 26 | labeling_mode: 'spatial' 27 | 28 | #optim 29 | weight_decay: 0.0001 30 | base_lr: 0.1 31 | step: [45, 55] 32 | 33 | # training 34 | device: [0, 1, 2, 3] 35 | batch_size: 128 36 | test_batch_size: 256 37 | num_epoch: 65 38 | nesterov: True 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /config/nturgbd-cross-subject/test_bone.yaml: -------------------------------------------------------------------------------- 1 | # feeder 2 | feeder: feeders.feeder.Feeder 3 | test_feeder_args: 4 | data_path: ./data/ntu/xsub/val_data_bone.npy 5 | label_path: ./data/ntu/xsub/val_label.pkl 6 | debug: False 7 | 8 | # model 9 | model: model.agcn.Model 10 | model_args: 11 | num_class: 60 12 | num_point: 25 13 | num_person: 2 14 | graph: graph.ntu_rgb_d.Graph 15 | graph_args: 16 | labeling_mode: 'spatial' 17 | 18 | # test 19 | phase: test 20 | device: [4,5,6,7] 21 | test_batch_size: 256 22 | weights: ./runs/ntu_cs_agcn_bone-49-31300.pt 23 | 24 | work_dir: ./work_dir/ntu/xsub/agcn_test_bone 25 | model_saved_name: ./runs/ntu_cs_agcn_test_bone 26 | save_score: True -------------------------------------------------------------------------------- /config/nturgbd-cross-subject/test_joint.yaml: -------------------------------------------------------------------------------- 1 | # feeder 2 | feeder: feeders.feeder.Feeder 3 | test_feeder_args: 4 | data_path: ./data/ntu/xsub/val_data_joint.npy 5 | label_path: ./data/ntu/xsub/val_label.pkl 6 | debug: False 7 | 8 | # model 9 | model: model.agcn.Model 10 | model_args: 11 | num_class: 60 12 | num_point: 25 13 | num_person: 2 14 | graph: graph.ntu_rgb_d.Graph 15 | graph_args: 16 | labeling_mode: 'spatial' 17 | 18 | # test 19 | phase: test 20 | device: [0,1,2,3] 21 | test_batch_size: 256 22 | weights: ./runs/ntu_cs_agcn_joint-49-31300.pt 23 | 24 | work_dir: ./work_dir/ntu/xsub/agcn_test_joint 25 | model_saved_name: ./runs/ntu_cs_agcn_test_joint 26 | save_score: True -------------------------------------------------------------------------------- /config/nturgbd-cross-subject/train_bone.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/ntu/xsub/agcn_bone 2 | model_saved_name: ./runs/ntu_cs_agcn_bone 3 | # feeder 4 | feeder: feeders.feeder.Feeder 5 | train_feeder_args: 6 | data_path: ./data/ntu/xsub/train_data_bone.npy 7 | label_path: ./data/ntu/xsub/train_label.pkl 8 | debug: False 9 | random_choose: False 10 | random_shift: False 11 | random_move: False 12 | window_size: -1 13 | normalization: False 14 | 15 | test_feeder_args: 16 | data_path: ./data/ntu/xsub/val_data_bone.npy 17 | label_path: ./data/ntu/xsub/val_label.pkl 18 | 19 | # model 20 | model: model.agcn.Model 21 | model_args: 22 | num_class: 60 23 | num_point: 25 24 | num_person: 2 25 | graph: graph.ntu_rgb_d.Graph 26 | graph_args: 27 | labeling_mode: 'spatial' 28 | 29 | #optim 30 | weight_decay: 0.0001 31 | base_lr: 0.1 32 | step: [30, 40] 33 | 34 | # training 35 | device: [4, 5, 6, 7] 36 | batch_size: 64 37 | test_batch_size: 64 38 | num_epoch: 50 39 | nesterov: True 40 | -------------------------------------------------------------------------------- /config/nturgbd-cross-subject/train_joint.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/ntu/xsub/agcn_joint 2 | model_saved_name: ./runs/ntu_cs_agcn_joint 3 | # feeder 4 | feeder: feeders.feeder.Feeder 5 | train_feeder_args: 6 | data_path: ./data/ntu/xsub/train_data_joint.npy 7 | label_path: ./data/ntu/xsub/train_label.pkl 8 | debug: False 9 | random_choose: False 10 | random_shift: False 11 | random_move: False 12 | window_size: -1 13 | normalization: False 14 | 15 | test_feeder_args: 16 | data_path: ./data/ntu/xsub/val_data_joint.npy 17 | label_path: ./data/ntu/xsub/val_label.pkl 18 | 19 | # model 20 | model: model.agcn.Model 21 | model_args: 22 | num_class: 60 23 | num_point: 25 24 | num_person: 2 25 | graph: graph.ntu_rgb_d.Graph 26 | graph_args: 27 | labeling_mode: 'spatial' 28 | 29 | #optim 30 | weight_decay: 0.0001 31 | base_lr: 0.1 32 | step: [30, 40] 33 | 34 | # training 35 | device: [0, 1 ,2, 3] 36 | batch_size: 64 37 | test_batch_size: 64 38 | num_epoch: 50 39 | nesterov: True 40 | 41 | -------------------------------------------------------------------------------- /config/nturgbd-cross-subject/train_joint_aagcn.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/ntu/xsub/aagcn_joint 2 | model_saved_name: ./runs/ntu_cs_aagcn_joint 3 | # feeder 4 | feeder: feeders.feeder.Feeder 5 | train_feeder_args: 6 | data_path: ./data/ntu/xsub/train_data_joint.npy 7 | label_path: ./data/ntu/xsub/train_label.pkl 8 | debug: False 9 | random_choose: False 10 | random_shift: False 11 | random_move: False 12 | window_size: -1 13 | normalization: False 14 | 15 | test_feeder_args: 16 | data_path: ./data/ntu/xsub/val_data_joint.npy 17 | label_path: ./data/ntu/xsub/val_label.pkl 18 | 19 | # model 20 | model: model.aagcn.Model 21 | model_args: 22 | num_class: 60 23 | num_point: 25 24 | num_person: 2 25 | graph: graph.ntu_rgb_d.Graph 26 | graph_args: 27 | labeling_mode: 'spatial' 28 | 29 | #optim 30 | weight_decay: 0.0001 31 | base_lr: 0.1 32 | step: [30, 40] 33 | warm_up_epoch: 5 34 | 35 | # training 36 | device: [0, 1, 2, 3] 37 | batch_size: 64 38 | test_batch_size: 64 39 | num_epoch: 50 40 | nesterov: True 41 | 42 | only_train_part: True 43 | only_train_epoch: 5 -------------------------------------------------------------------------------- /config/nturgbd-cross-view/test_bone.yaml: -------------------------------------------------------------------------------- 1 | # feeder 2 | feeder: feeders.feeder.Feeder 3 | test_feeder_args: 4 | data_path: ./data/ntu/xview/val_data_bone.npy 5 | label_path: ./data/ntu/xview/val_label.pkl 6 | debug: False 7 | 8 | # model 9 | model: model.agcn.Model 10 | model_args: 11 | num_class: 60 12 | num_point: 25 13 | num_person: 2 14 | graph: graph.ntu_rgb_d.Graph 15 | graph_args: 16 | labeling_mode: 'spatial' 17 | 18 | 19 | # test 20 | phase: test 21 | device: [0,1,2,3] 22 | test_batch_size: 256 23 | weights: ./runs/ntu_cv_agcn_bone-49-29400.pt 24 | 25 | work_dir: ./work_dir/ntu/xview/agcn_test_bone 26 | model_saved_name: ./runs/ntu_cv_agcn_test_bone 27 | save_score: True -------------------------------------------------------------------------------- /config/nturgbd-cross-view/test_joint.yaml: -------------------------------------------------------------------------------- 1 | # feeder 2 | feeder: feeders.feeder.Feeder 3 | test_feeder_args: 4 | data_path: ./data/ntu/xview/val_data_joint.npy 5 | label_path: ./data/ntu/xview/val_label.pkl 6 | debug: False 7 | 8 | # model 9 | model: model.agcn.Model 10 | model_args: 11 | num_class: 60 12 | num_point: 25 13 | num_person: 2 14 | graph: graph.ntu_rgb_d.Graph 15 | graph_args: 16 | labeling_mode: 'spatial' 17 | 18 | 19 | # test 20 | phase: test 21 | device: [0,1,2,3] 22 | test_batch_size: 256 23 | weights: ./runs/ntu_cv_agcn_joint-49-29400.pt 24 | 25 | work_dir: ./work_dir/ntu/xview/agcn_test_joint 26 | model_saved_name: ./runs/ntu_cv_agcn_test_joint 27 | save_score: True -------------------------------------------------------------------------------- /config/nturgbd-cross-view/train_bone.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/ntu/xview/agcn_bone 2 | model_saved_name: ./runs/ntu_cv_agcn_bone 3 | # feeder 4 | feeder: feeders.feeder.Feeder 5 | train_feeder_args: 6 | data_path: ./data/ntu/xview/train_data_bone.npy 7 | label_path: ./data/ntu/xview/train_label.pkl 8 | debug: False 9 | random_choose: False 10 | random_shift: False 11 | random_move: False 12 | window_size: -1 13 | normalization: False 14 | 15 | test_feeder_args: 16 | data_path: ./data/ntu/xview/val_data_bone.npy 17 | label_path: ./data/ntu/xview/val_label.pkl 18 | 19 | # model 20 | model: model.agcn.Model 21 | model_args: 22 | num_class: 60 23 | num_point: 25 24 | num_person: 2 25 | graph: graph.ntu_rgb_d.Graph 26 | graph_args: 27 | labeling_mode: 'spatial' 28 | 29 | #optim 30 | weight_decay: 0.0001 31 | base_lr: 0.1 32 | step: [30, 40] 33 | 34 | # training 35 | device: [4, 5 ,6, 7] 36 | batch_size: 64 37 | test_batch_size: 64 38 | num_epoch: 50 39 | nesterov: True 40 | 41 | -------------------------------------------------------------------------------- /config/nturgbd-cross-view/train_joint.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/ntu/xview/agcn_joint 2 | model_saved_name: ./runs/ntu_cv_agcn_joint 3 | # feeder 4 | feeder: feeders.feeder.Feeder 5 | train_feeder_args: 6 | data_path: ./data/ntu/xview/train_data_joint.npy 7 | label_path: ./data/ntu/xview/train_label.pkl 8 | debug: False 9 | random_choose: False 10 | random_shift: False 11 | random_move: False 12 | window_size: -1 13 | normalization: False 14 | 15 | test_feeder_args: 16 | data_path: ./data/ntu/xview/val_data_joint.npy 17 | label_path: ./data/ntu/xview/val_label.pkl 18 | 19 | # model 20 | model: model.agcn.Model 21 | model_args: 22 | num_class: 60 23 | num_point: 25 24 | num_person: 2 25 | graph: graph.ntu_rgb_d.Graph 26 | graph_args: 27 | labeling_mode: 'spatial' 28 | 29 | #optim 30 | weight_decay: 0.0001 31 | base_lr: 0.1 32 | step: [30, 40] 33 | 34 | # training 35 | device: [0, 1 ,2, 3] 36 | batch_size: 64 37 | test_batch_size: 64 38 | num_epoch: 50 39 | nesterov: True 40 | -------------------------------------------------------------------------------- /data/nturgbd_raw/samples_with_missing_skeletons.txt: -------------------------------------------------------------------------------- 1 | S001C002P005R002A008 2 | S001C002P006R001A008 3 | S001C003P002R001A055 4 | S001C003P002R002A012 5 | S001C003P005R002A004 6 | S001C003P005R002A005 7 | S001C003P005R002A006 8 | S001C003P006R002A008 9 | S002C002P011R002A030 10 | S002C003P008R001A020 11 | S002C003P010R002A010 12 | S002C003P011R002A007 13 | S002C003P011R002A011 14 | S002C003P014R002A007 15 | S003C001P019R001A055 16 | S003C002P002R002A055 17 | S003C002P018R002A055 18 | S003C003P002R001A055 19 | S003C003P016R001A055 20 | S003C003P018R002A024 21 | S004C002P003R001A013 22 | S004C002P008R001A009 23 | S004C002P020R001A003 24 | S004C002P020R001A004 25 | S004C002P020R001A012 26 | S004C002P020R001A020 27 | S004C002P020R001A021 28 | S004C002P020R001A036 29 | S005C002P004R001A001 30 | S005C002P004R001A003 31 | S005C002P010R001A016 32 | S005C002P010R001A017 33 | S005C002P010R001A048 34 | S005C002P010R001A049 35 | S005C002P016R001A009 36 | S005C002P016R001A010 37 | S005C002P018R001A003 38 | S005C002P018R001A028 39 | S005C002P018R001A029 40 | S005C003P016R002A009 41 | S005C003P018R002A013 42 | S005C003P021R002A057 43 | S006C001P001R002A055 44 | S006C002P007R001A005 45 | S006C002P007R001A006 46 | S006C002P016R001A043 47 | S006C002P016R001A051 48 | S006C002P016R001A052 49 | S006C002P022R001A012 50 | S006C002P023R001A020 51 | S006C002P023R001A021 52 | S006C002P023R001A022 53 | S006C002P023R001A023 54 | S006C002P024R001A018 55 | S006C002P024R001A019 56 | S006C003P001R002A013 57 | S006C003P007R002A009 58 | S006C003P007R002A010 59 | S006C003P007R002A025 60 | S006C003P016R001A060 61 | S006C003P017R001A055 62 | S006C003P017R002A013 63 | S006C003P017R002A014 64 | S006C003P017R002A015 65 | S006C003P022R002A013 66 | S007C001P018R002A050 67 | S007C001P025R002A051 68 | S007C001P028R001A050 69 | S007C001P028R001A051 70 | S007C001P028R001A052 71 | S007C002P008R002A008 72 | S007C002P015R002A055 73 | S007C002P026R001A008 74 | S007C002P026R001A009 75 | S007C002P026R001A010 76 | S007C002P026R001A011 77 | S007C002P026R001A012 78 | S007C002P026R001A050 79 | S007C002P027R001A011 80 | S007C002P027R001A013 81 | S007C002P028R002A055 82 | S007C003P007R001A002 83 | S007C003P007R001A004 84 | S007C003P019R001A060 85 | S007C003P027R002A001 86 | S007C003P027R002A002 87 | S007C003P027R002A003 88 | S007C003P027R002A004 89 | S007C003P027R002A005 90 | S007C003P027R002A006 91 | S007C003P027R002A007 92 | S007C003P027R002A008 93 | S007C003P027R002A009 94 | S007C003P027R002A010 95 | S007C003P027R002A011 96 | S007C003P027R002A012 97 | S007C003P027R002A013 98 | S008C002P001R001A009 99 | S008C002P001R001A010 100 | S008C002P001R001A014 101 | S008C002P001R001A015 102 | S008C002P001R001A016 103 | S008C002P001R001A018 104 | S008C002P001R001A019 105 | S008C002P008R002A059 106 | S008C002P025R001A060 107 | S008C002P029R001A004 108 | S008C002P031R001A005 109 | S008C002P031R001A006 110 | S008C002P032R001A018 111 | S008C002P034R001A018 112 | S008C002P034R001A019 113 | S008C002P035R001A059 114 | S008C002P035R002A002 115 | S008C002P035R002A005 116 | S008C003P007R001A009 117 | S008C003P007R001A016 118 | S008C003P007R001A017 119 | S008C003P007R001A018 120 | S008C003P007R001A019 121 | S008C003P007R001A020 122 | S008C003P007R001A021 123 | S008C003P007R001A022 124 | S008C003P007R001A023 125 | S008C003P007R001A025 126 | S008C003P007R001A026 127 | S008C003P007R001A028 128 | S008C003P007R001A029 129 | S008C003P007R002A003 130 | S008C003P008R002A050 131 | S008C003P025R002A002 132 | S008C003P025R002A011 133 | S008C003P025R002A012 134 | S008C003P025R002A016 135 | S008C003P025R002A020 136 | S008C003P025R002A022 137 | S008C003P025R002A023 138 | S008C003P025R002A030 139 | S008C003P025R002A031 140 | S008C003P025R002A032 141 | S008C003P025R002A033 142 | S008C003P025R002A049 143 | S008C003P025R002A060 144 | S008C003P031R001A001 145 | S008C003P031R002A004 146 | S008C003P031R002A014 147 | S008C003P031R002A015 148 | S008C003P031R002A016 149 | S008C003P031R002A017 150 | S008C003P032R002A013 151 | S008C003P033R002A001 152 | S008C003P033R002A011 153 | S008C003P033R002A012 154 | S008C003P034R002A001 155 | S008C003P034R002A012 156 | S008C003P034R002A022 157 | S008C003P034R002A023 158 | S008C003P034R002A024 159 | S008C003P034R002A044 160 | S008C003P034R002A045 161 | S008C003P035R002A016 162 | S008C003P035R002A017 163 | S008C003P035R002A018 164 | S008C003P035R002A019 165 | S008C003P035R002A020 166 | S008C003P035R002A021 167 | S009C002P007R001A001 168 | S009C002P007R001A003 169 | S009C002P007R001A014 170 | S009C002P008R001A014 171 | S009C002P015R002A050 172 | S009C002P016R001A002 173 | S009C002P017R001A028 174 | S009C002P017R001A029 175 | S009C003P017R002A030 176 | S009C003P025R002A054 177 | S010C001P007R002A020 178 | S010C002P016R002A055 179 | S010C002P017R001A005 180 | S010C002P017R001A018 181 | S010C002P017R001A019 182 | S010C002P019R001A001 183 | S010C002P025R001A012 184 | S010C003P007R002A043 185 | S010C003P008R002A003 186 | S010C003P016R001A055 187 | S010C003P017R002A055 188 | S011C001P002R001A008 189 | S011C001P018R002A050 190 | S011C002P008R002A059 191 | S011C002P016R002A055 192 | S011C002P017R001A020 193 | S011C002P017R001A021 194 | S011C002P018R002A055 195 | S011C002P027R001A009 196 | S011C002P027R001A010 197 | S011C002P027R001A037 198 | S011C003P001R001A055 199 | S011C003P002R001A055 200 | S011C003P008R002A012 201 | S011C003P015R001A055 202 | S011C003P016R001A055 203 | S011C003P019R001A055 204 | S011C003P025R001A055 205 | S011C003P028R002A055 206 | S012C001P019R001A060 207 | S012C001P019R002A060 208 | S012C002P015R001A055 209 | S012C002P017R002A012 210 | S012C002P025R001A060 211 | S012C003P008R001A057 212 | S012C003P015R001A055 213 | S012C003P015R002A055 214 | S012C003P016R001A055 215 | S012C003P017R002A055 216 | S012C003P018R001A055 217 | S012C003P018R001A057 218 | S012C003P019R002A011 219 | S012C003P019R002A012 220 | S012C003P025R001A055 221 | S012C003P027R001A055 222 | S012C003P027R002A009 223 | S012C003P028R001A035 224 | S012C003P028R002A055 225 | S013C001P015R001A054 226 | S013C001P017R002A054 227 | S013C001P018R001A016 228 | S013C001P028R001A040 229 | S013C002P015R001A054 230 | S013C002P017R002A054 231 | S013C002P028R001A040 232 | S013C003P008R002A059 233 | S013C003P015R001A054 234 | S013C003P017R002A054 235 | S013C003P025R002A022 236 | S013C003P027R001A055 237 | S013C003P028R001A040 238 | S014C001P027R002A040 239 | S014C002P015R001A003 240 | S014C002P019R001A029 241 | S014C002P025R002A059 242 | S014C002P027R002A040 243 | S014C002P039R001A050 244 | S014C003P007R002A059 245 | S014C003P015R002A055 246 | S014C003P019R002A055 247 | S014C003P025R001A048 248 | S014C003P027R002A040 249 | S015C001P008R002A040 250 | S015C001P016R001A055 251 | S015C001P017R001A055 252 | S015C001P017R002A055 253 | S015C002P007R001A059 254 | S015C002P008R001A003 255 | S015C002P008R001A004 256 | S015C002P008R002A040 257 | S015C002P015R001A002 258 | S015C002P016R001A001 259 | S015C002P016R002A055 260 | S015C003P008R002A007 261 | S015C003P008R002A011 262 | S015C003P008R002A012 263 | S015C003P008R002A028 264 | S015C003P008R002A040 265 | S015C003P025R002A012 266 | S015C003P025R002A017 267 | S015C003P025R002A020 268 | S015C003P025R002A021 269 | S015C003P025R002A030 270 | S015C003P025R002A033 271 | S015C003P025R002A034 272 | S015C003P025R002A036 273 | S015C003P025R002A037 274 | S015C003P025R002A044 275 | S016C001P019R002A040 276 | S016C001P025R001A011 277 | S016C001P025R001A012 278 | S016C001P025R001A060 279 | S016C001P040R001A055 280 | S016C001P040R002A055 281 | S016C002P008R001A011 282 | S016C002P019R002A040 283 | S016C002P025R002A012 284 | S016C003P008R001A011 285 | S016C003P008R002A002 286 | S016C003P008R002A003 287 | S016C003P008R002A004 288 | S016C003P008R002A006 289 | S016C003P008R002A009 290 | S016C003P019R002A040 291 | S016C003P039R002A016 292 | S017C001P016R002A031 293 | S017C002P007R001A013 294 | S017C002P008R001A009 295 | S017C002P015R001A042 296 | S017C002P016R002A031 297 | S017C002P016R002A055 298 | S017C003P007R002A013 299 | S017C003P008R001A059 300 | S017C003P016R002A031 301 | S017C003P017R001A055 302 | S017C003P020R001A059 -------------------------------------------------------------------------------- /data_gen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshiwjx/2s-AGCN/953c14fc10883cd869646328f5d522e9e9282063/data_gen/__init__.py -------------------------------------------------------------------------------- /data_gen/gen_bone_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from numpy.lib.format import open_memmap 4 | 5 | paris = { 6 | 'ntu/xview': ( 7 | (1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6), (8, 7), (9, 21), (10, 9), (11, 10), (12, 11), 8 | (13, 1), 9 | (14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18), (20, 19), (22, 23), (21, 21), (23, 8), (24, 25), 10 | (25, 12) 11 | ), 12 | 'ntu/xsub': ( 13 | (1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6), (8, 7), (9, 21), (10, 9), (11, 10), (12, 11), 14 | (13, 1), 15 | (14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18), (20, 19), (22, 23), (21, 21), (23, 8), (24, 25), 16 | (25, 12) 17 | ), 18 | 19 | 'kinetics': ((0, 0), (1, 0), (2, 1), (3, 2), (4, 3), (5, 1), (6, 5), (7, 6), (8, 2), (9, 8), (10, 9), 20 | (11, 5), (12, 11), (13, 12), (14, 0), (15, 0), (16, 14), (17, 15)) 21 | } 22 | 23 | sets = { 24 | 'train', 'val' 25 | } 26 | 27 | # 'ntu/xview', 'ntu/xsub', 'kinetics' 28 | datasets = { 29 | 'ntu/xview', 'ntu/xsub', 30 | } 31 | # bone 32 | from tqdm import tqdm 33 | 34 | for dataset in datasets: 35 | for set in sets: 36 | print(dataset, set) 37 | data = np.load('../data/{}/{}_data_joint.npy'.format(dataset, set)) 38 | N, C, T, V, M = data.shape 39 | fp_sp = open_memmap( 40 | '../data/{}/{}_data_bone.npy'.format(dataset, set), 41 | dtype='float32', 42 | mode='w+', 43 | shape=(N, 3, T, V, M)) 44 | 45 | fp_sp[:, :C, :, :, :] = data 46 | for v1, v2 in tqdm(paris[dataset]): 47 | if dataset != 'kinetics': 48 | v1 -= 1 49 | v2 -= 1 50 | fp_sp[:, :, :, v1, :] = data[:, :, :, v1, :] - data[:, :, :, v2, :] 51 | -------------------------------------------------------------------------------- /data_gen/gen_motion_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from numpy.lib.format import open_memmap 4 | 5 | sets = { 6 | 'train', 'val' 7 | } 8 | # 'ntu/xview', 'ntu/xsub', 'kinetics' 9 | datasets = { 10 | 'ntu/xview', 'ntu/xsub' 11 | } 12 | 13 | parts = { 14 | 'joint', 'bone' 15 | } 16 | from tqdm import tqdm 17 | 18 | for dataset in datasets: 19 | for set in sets: 20 | for part in parts: 21 | print(dataset, set, part) 22 | data = np.load('../data/{}/{}_data_{}.npy'.format(dataset, set, part)) 23 | N, C, T, V, M = data.shape 24 | fp_sp = open_memmap( 25 | '../data/{}/{}_data_{}_motion.npy'.format(dataset, set, part), 26 | dtype='float32', 27 | mode='w+', 28 | shape=(N, 3, T, V, M)) 29 | for t in tqdm(range(T - 1)): 30 | fp_sp[:, :, t, :, :] = data[:, :, t + 1, :, :] - data[:, :, t, :, :] 31 | fp_sp[:, :, T - 1, :, :] = 0 32 | -------------------------------------------------------------------------------- /data_gen/kinetics_gendata.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import numpy as np 4 | import json 5 | from torch.utils.data import Dataset 6 | import pickle 7 | from tqdm import tqdm 8 | 9 | num_joint = 18 10 | max_frame = 300 11 | num_person_out = 2 12 | num_person_in = 5 13 | 14 | 15 | class Feeder_kinetics(Dataset): 16 | """ Feeder for skeleton-based action recognition in kinetics-skeleton dataset 17 | # Joint index: 18 | # {0, "Nose"} 19 | # {1, "Neck"}, 20 | # {2, "RShoulder"}, 21 | # {3, "RElbow"}, 22 | # {4, "RWrist"}, 23 | # {5, "LShoulder"}, 24 | # {6, "LElbow"}, 25 | # {7, "LWrist"}, 26 | # {8, "RHip"}, 27 | # {9, "RKnee"}, 28 | # {10, "RAnkle"}, 29 | # {11, "LHip"}, 30 | # {12, "LKnee"}, 31 | # {13, "LAnkle"}, 32 | # {14, "REye"}, 33 | # {15, "LEye"}, 34 | # {16, "REar"}, 35 | # {17, "LEar"}, 36 | Arguments: 37 | data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M) 38 | label_path: the path to label 39 | window_size: The length of the output sequence 40 | num_person_in: The number of people the feeder can observe in the input sequence 41 | num_person_out: The number of people the feeder in the output sequence 42 | debug: If true, only use the first 100 samples 43 | """ 44 | 45 | def __init__(self, 46 | data_path, 47 | label_path, 48 | ignore_empty_sample=True, 49 | window_size=-1, 50 | num_person_in=5, 51 | num_person_out=2): 52 | self.data_path = data_path 53 | self.label_path = label_path 54 | self.window_size = window_size 55 | self.num_person_in = num_person_in 56 | self.num_person_out = num_person_out 57 | self.ignore_empty_sample = ignore_empty_sample 58 | 59 | self.load_data() 60 | 61 | def load_data(self): 62 | # load file list 63 | self.sample_name = os.listdir(self.data_path) 64 | 65 | # load label 66 | label_path = self.label_path 67 | with open(label_path) as f: 68 | label_info = json.load(f) 69 | 70 | sample_id = [name.split('.')[0] for name in self.sample_name] 71 | self.label = np.array([label_info[id]['label_index'] for id in sample_id]) 72 | has_skeleton = np.array([label_info[id]['has_skeleton'] for id in sample_id]) 73 | 74 | # ignore the samples which does not has skeleton sequence 75 | if self.ignore_empty_sample: 76 | self.sample_name = [s for h, s in zip(has_skeleton, self.sample_name) if h] 77 | self.label = self.label[has_skeleton] 78 | 79 | # output data shape (N, C, T, V, M) 80 | self.N = len(self.sample_name) # sample 81 | self.C = 3 # channel 82 | self.T = max_frame # frame 83 | self.V = num_joint # joint 84 | self.M = self.num_person_out # person 85 | 86 | def __len__(self): 87 | return len(self.sample_name) 88 | 89 | def __iter__(self): 90 | return self 91 | 92 | def __getitem__(self, index): 93 | 94 | # output shape (C, T, V, M) 95 | # get data 96 | sample_name = self.sample_name[index] 97 | sample_path = os.path.join(self.data_path, sample_name) 98 | with open(sample_path, 'r') as f: 99 | video_info = json.load(f) 100 | 101 | # fill data_numpy 102 | data_numpy = np.zeros((self.C, self.T, self.V, self.num_person_in)) 103 | for frame_info in video_info['data']: 104 | frame_index = frame_info['frame_index'] 105 | for m, skeleton_info in enumerate(frame_info["skeleton"]): 106 | if m >= self.num_person_in: 107 | break 108 | pose = skeleton_info['pose'] 109 | score = skeleton_info['score'] 110 | data_numpy[0, frame_index, :, m] = pose[0::2] 111 | data_numpy[1, frame_index, :, m] = pose[1::2] 112 | data_numpy[2, frame_index, :, m] = score 113 | 114 | # centralization 115 | data_numpy[0:2] = data_numpy[0:2] - 0.5 116 | data_numpy[1:2] = -data_numpy[1:2] 117 | data_numpy[0][data_numpy[2] == 0] = 0 118 | data_numpy[1][data_numpy[2] == 0] = 0 119 | 120 | # get & check label index 121 | label = video_info['label_index'] 122 | assert (self.label[index] == label) 123 | 124 | # sort by score 125 | sort_index = (-data_numpy[2, :, :, :].sum(axis=1)).argsort(axis=1) 126 | for t, s in enumerate(sort_index): 127 | data_numpy[:, t, :, :] = data_numpy[:, t, :, s].transpose((1, 2, 128 | 0)) 129 | data_numpy = data_numpy[:, :, :, 0:self.num_person_out] 130 | 131 | return data_numpy, label 132 | 133 | 134 | def gendata(data_path, label_path, 135 | data_out_path, label_out_path, 136 | num_person_in=num_person_in, # observe the first 5 persons 137 | num_person_out=num_person_out, # then choose 2 persons with the highest score 138 | max_frame=max_frame): 139 | feeder = Feeder_kinetics( 140 | data_path=data_path, 141 | label_path=label_path, 142 | num_person_in=num_person_in, 143 | num_person_out=num_person_out, 144 | window_size=max_frame) 145 | 146 | sample_name = feeder.sample_name 147 | sample_label = [] 148 | 149 | fp = np.zeros((len(sample_name), 3, max_frame, num_joint, num_person_out), dtype=np.float32) 150 | 151 | for i, s in enumerate(tqdm(sample_name)): 152 | data, label = feeder[i] 153 | fp[i, :, 0:data.shape[1], :, :] = data 154 | sample_label.append(label) 155 | 156 | with open(label_out_path, 'wb') as f: 157 | pickle.dump((sample_name, list(sample_label)), f) 158 | 159 | np.save(data_out_path, fp) 160 | 161 | 162 | if __name__ == '__main__': 163 | parser = argparse.ArgumentParser( 164 | description='Kinetics-skeleton Data Converter.') 165 | parser.add_argument( 166 | '--data_path', default='../data/kinetics_raw') 167 | parser.add_argument( 168 | '--out_folder', default='../data/kinetics') 169 | arg = parser.parse_args() 170 | 171 | part = ['val', 'train'] 172 | for p in part: 173 | print('kinetics ', p) 174 | if not os.path.exists(arg.out_folder): 175 | os.makedirs(arg.out_folder) 176 | data_path = '{}/kinetics_{}'.format(arg.data_path, p) 177 | label_path = '{}/kinetics_{}_label.json'.format(arg.data_path, p) 178 | data_out_path = '{}/{}_data_joint.npy'.format(arg.out_folder, p) 179 | label_out_path = '{}/{}_label.pkl'.format(arg.out_folder, p) 180 | 181 | gendata(data_path, label_path, data_out_path, label_out_path) 182 | -------------------------------------------------------------------------------- /data_gen/merge_joint_bone_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | sets = { 5 | 'train', 'val' 6 | } 7 | 8 | # 'ntu/xview', 'ntu/xsub', 'kinetics' 9 | datasets = { 10 | 'ntu/xview', 'ntu/xsub' 11 | } 12 | 13 | for dataset in datasets: 14 | for set in sets: 15 | print(dataset, set) 16 | data_jpt = np.load('../data/{}/{}_data_joint.npy'.format(dataset, set)) 17 | data_bone = np.load('../data/{}/{}_data_bone.npy'.format(dataset, set)) 18 | N, C, T, V, M = data_jpt.shape 19 | data_jpt_bone = np.concatenate((data_jpt, data_bone), axis=1) 20 | np.save('../data/{}/{}_data_joint_bone.npy'.format(dataset, set), data_jpt_bone) 21 | -------------------------------------------------------------------------------- /data_gen/ntu_gendata.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pickle 3 | from tqdm import tqdm 4 | import sys 5 | 6 | sys.path.extend(['../']) 7 | from data_gen.preprocess import pre_normalization 8 | 9 | training_subjects = [ 10 | 1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35, 38 11 | ] 12 | training_cameras = [2, 3] 13 | max_body_true = 2 14 | max_body_kinect = 4 15 | num_joint = 25 16 | max_frame = 300 17 | 18 | import numpy as np 19 | import os 20 | 21 | 22 | def read_skeleton_filter(file): 23 | with open(file, 'r') as f: 24 | skeleton_sequence = {} 25 | skeleton_sequence['numFrame'] = int(f.readline()) 26 | skeleton_sequence['frameInfo'] = [] 27 | # num_body = 0 28 | for t in range(skeleton_sequence['numFrame']): 29 | frame_info = {} 30 | frame_info['numBody'] = int(f.readline()) 31 | frame_info['bodyInfo'] = [] 32 | 33 | for m in range(frame_info['numBody']): 34 | body_info = {} 35 | body_info_key = [ 36 | 'bodyID', 'clipedEdges', 'handLeftConfidence', 37 | 'handLeftState', 'handRightConfidence', 'handRightState', 38 | 'isResticted', 'leanX', 'leanY', 'trackingState' 39 | ] 40 | body_info = { 41 | k: float(v) 42 | for k, v in zip(body_info_key, f.readline().split()) 43 | } 44 | body_info['numJoint'] = int(f.readline()) 45 | body_info['jointInfo'] = [] 46 | for v in range(body_info['numJoint']): 47 | joint_info_key = [ 48 | 'x', 'y', 'z', 'depthX', 'depthY', 'colorX', 'colorY', 49 | 'orientationW', 'orientationX', 'orientationY', 50 | 'orientationZ', 'trackingState' 51 | ] 52 | joint_info = { 53 | k: float(v) 54 | for k, v in zip(joint_info_key, f.readline().split()) 55 | } 56 | body_info['jointInfo'].append(joint_info) 57 | frame_info['bodyInfo'].append(body_info) 58 | skeleton_sequence['frameInfo'].append(frame_info) 59 | 60 | return skeleton_sequence 61 | 62 | 63 | def get_nonzero_std(s): # tvc 64 | index = s.sum(-1).sum(-1) != 0 # select valid frames 65 | s = s[index] 66 | if len(s) != 0: 67 | s = s[:, :, 0].std() + s[:, :, 1].std() + s[:, :, 2].std() # three channels 68 | else: 69 | s = 0 70 | return s 71 | 72 | 73 | def read_xyz(file, max_body=4, num_joint=25): # 取了前两个body 74 | seq_info = read_skeleton_filter(file) 75 | data = np.zeros((max_body, seq_info['numFrame'], num_joint, 3)) 76 | for n, f in enumerate(seq_info['frameInfo']): 77 | for m, b in enumerate(f['bodyInfo']): 78 | for j, v in enumerate(b['jointInfo']): 79 | if m < max_body and j < num_joint: 80 | data[m, n, j, :] = [v['x'], v['y'], v['z']] 81 | else: 82 | pass 83 | 84 | # select two max energy body 85 | energy = np.array([get_nonzero_std(x) for x in data]) 86 | index = energy.argsort()[::-1][0:max_body_true] 87 | data = data[index] 88 | 89 | data = data.transpose(3, 1, 2, 0) 90 | return data 91 | 92 | 93 | def gendata(data_path, out_path, ignored_sample_path=None, benchmark='xview', part='eval'): 94 | if ignored_sample_path != None: 95 | with open(ignored_sample_path, 'r') as f: 96 | ignored_samples = [ 97 | line.strip() + '.skeleton' for line in f.readlines() 98 | ] 99 | else: 100 | ignored_samples = [] 101 | sample_name = [] 102 | sample_label = [] 103 | for filename in os.listdir(data_path): 104 | if filename in ignored_samples: 105 | continue 106 | action_class = int( 107 | filename[filename.find('A') + 1:filename.find('A') + 4]) 108 | subject_id = int( 109 | filename[filename.find('P') + 1:filename.find('P') + 4]) 110 | camera_id = int( 111 | filename[filename.find('C') + 1:filename.find('C') + 4]) 112 | 113 | if benchmark == 'xview': 114 | istraining = (camera_id in training_cameras) 115 | elif benchmark == 'xsub': 116 | istraining = (subject_id in training_subjects) 117 | else: 118 | raise ValueError() 119 | 120 | if part == 'train': 121 | issample = istraining 122 | elif part == 'val': 123 | issample = not (istraining) 124 | else: 125 | raise ValueError() 126 | 127 | if issample: 128 | sample_name.append(filename) 129 | sample_label.append(action_class - 1) 130 | 131 | with open('{}/{}_label.pkl'.format(out_path, part), 'wb') as f: 132 | pickle.dump((sample_name, list(sample_label)), f) 133 | 134 | fp = np.zeros((len(sample_label), 3, max_frame, num_joint, max_body_true), dtype=np.float32) 135 | 136 | for i, s in enumerate(tqdm(sample_name)): 137 | data = read_xyz(os.path.join(data_path, s), max_body=max_body_kinect, num_joint=num_joint) 138 | fp[i, :, 0:data.shape[1], :, :] = data 139 | 140 | fp = pre_normalization(fp) 141 | np.save('{}/{}_data_joint.npy'.format(out_path, part), fp) 142 | 143 | 144 | if __name__ == '__main__': 145 | parser = argparse.ArgumentParser(description='NTU-RGB-D Data Converter.') 146 | parser.add_argument('--data_path', default='../data/nturgbd_raw/nturgb+d_skeletons/') 147 | parser.add_argument('--ignored_sample_path', 148 | default='../data/nturgbd_raw/samples_with_missing_skeletons.txt') 149 | parser.add_argument('--out_folder', default='../data/ntu/') 150 | 151 | benchmark = ['xsub', 'xview'] 152 | part = ['train', 'val'] 153 | arg = parser.parse_args() 154 | 155 | for b in benchmark: 156 | for p in part: 157 | out_path = os.path.join(arg.out_folder, b) 158 | if not os.path.exists(out_path): 159 | os.makedirs(out_path) 160 | print(b, p) 161 | gendata( 162 | arg.data_path, 163 | out_path, 164 | arg.ignored_sample_path, 165 | benchmark=b, 166 | part=p) 167 | -------------------------------------------------------------------------------- /data_gen/preprocess.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.extend(['../']) 4 | from data_gen.rotation import * 5 | from tqdm import tqdm 6 | 7 | 8 | def pre_normalization(data, zaxis=[0, 1], xaxis=[8, 4]): 9 | N, C, T, V, M = data.shape 10 | s = np.transpose(data, [0, 4, 2, 3, 1]) # N, C, T, V, M to N, M, T, V, C 11 | 12 | print('pad the null frames with the previous frames') 13 | for i_s, skeleton in enumerate(tqdm(s)): # pad 14 | if skeleton.sum() == 0: 15 | print(i_s, ' has no skeleton') 16 | for i_p, person in enumerate(skeleton): 17 | if person.sum() == 0: 18 | continue 19 | if person[0].sum() == 0: 20 | index = (person.sum(-1).sum(-1) != 0) 21 | tmp = person[index].copy() 22 | person *= 0 23 | person[:len(tmp)] = tmp 24 | for i_f, frame in enumerate(person): 25 | if frame.sum() == 0: 26 | if person[i_f:].sum() == 0: 27 | rest = len(person) - i_f 28 | num = int(np.ceil(rest / i_f)) 29 | pad = np.concatenate([person[0:i_f] for _ in range(num)], 0)[:rest] 30 | s[i_s, i_p, i_f:] = pad 31 | break 32 | 33 | print('sub the center joint #1 (spine joint in ntu and neck joint in kinetics)') 34 | for i_s, skeleton in enumerate(tqdm(s)): 35 | if skeleton.sum() == 0: 36 | continue 37 | main_body_center = skeleton[0][:, 1:2, :].copy() 38 | for i_p, person in enumerate(skeleton): 39 | if person.sum() == 0: 40 | continue 41 | mask = (person.sum(-1) != 0).reshape(T, V, 1) 42 | s[i_s, i_p] = (s[i_s, i_p] - main_body_center) * mask 43 | 44 | print('parallel the bone between hip(jpt 0) and spine(jpt 1) of the first person to the z axis') 45 | for i_s, skeleton in enumerate(tqdm(s)): 46 | if skeleton.sum() == 0: 47 | continue 48 | joint_bottom = skeleton[0, 0, zaxis[0]] 49 | joint_top = skeleton[0, 0, zaxis[1]] 50 | axis = np.cross(joint_top - joint_bottom, [0, 0, 1]) 51 | angle = angle_between(joint_top - joint_bottom, [0, 0, 1]) 52 | matrix_z = rotation_matrix(axis, angle) 53 | for i_p, person in enumerate(skeleton): 54 | if person.sum() == 0: 55 | continue 56 | for i_f, frame in enumerate(person): 57 | if frame.sum() == 0: 58 | continue 59 | for i_j, joint in enumerate(frame): 60 | s[i_s, i_p, i_f, i_j] = np.dot(matrix_z, joint) 61 | 62 | print( 63 | 'parallel the bone between right shoulder(jpt 8) and left shoulder(jpt 4) of the first person to the x axis') 64 | for i_s, skeleton in enumerate(tqdm(s)): 65 | if skeleton.sum() == 0: 66 | continue 67 | joint_rshoulder = skeleton[0, 0, xaxis[0]] 68 | joint_lshoulder = skeleton[0, 0, xaxis[1]] 69 | axis = np.cross(joint_rshoulder - joint_lshoulder, [1, 0, 0]) 70 | angle = angle_between(joint_rshoulder - joint_lshoulder, [1, 0, 0]) 71 | matrix_x = rotation_matrix(axis, angle) 72 | for i_p, person in enumerate(skeleton): 73 | if person.sum() == 0: 74 | continue 75 | for i_f, frame in enumerate(person): 76 | if frame.sum() == 0: 77 | continue 78 | for i_j, joint in enumerate(frame): 79 | s[i_s, i_p, i_f, i_j] = np.dot(matrix_x, joint) 80 | 81 | data = np.transpose(s, [0, 4, 2, 3, 1]) 82 | return data 83 | 84 | 85 | if __name__ == '__main__': 86 | data = np.load('../data/ntu/xview/val_data.npy') 87 | pre_normalization(data) 88 | np.save('../data/ntu/xview/data_val_pre.npy', data) 89 | -------------------------------------------------------------------------------- /data_gen/rotation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | 5 | def rotation_matrix(axis, theta): 6 | """ 7 | Return the rotation matrix associated with counterclockwise rotation about 8 | the given axis by theta radians. 9 | """ 10 | if np.abs(axis).sum() < 1e-6 or np.abs(theta) < 1e-6: 11 | return np.eye(3) 12 | axis = np.asarray(axis) 13 | axis = axis / math.sqrt(np.dot(axis, axis)) 14 | a = math.cos(theta / 2.0) 15 | b, c, d = -axis * math.sin(theta / 2.0) 16 | aa, bb, cc, dd = a * a, b * b, c * c, d * d 17 | bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d 18 | return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)], 19 | [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)], 20 | [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]]) 21 | 22 | 23 | def unit_vector(vector): 24 | """ Returns the unit vector of the vector. """ 25 | return vector / np.linalg.norm(vector) 26 | 27 | 28 | def angle_between(v1, v2): 29 | """ Returns the angle in radians between vectors 'v1' and 'v2':: 30 | 31 | >>> angle_between((1, 0, 0), (0, 1, 0)) 32 | 1.5707963267948966 33 | >>> angle_between((1, 0, 0), (1, 0, 0)) 34 | 0.0 35 | >>> angle_between((1, 0, 0), (-1, 0, 0)) 36 | 3.141592653589793 37 | """ 38 | if np.abs(v1).sum() < 1e-6 or np.abs(v2).sum() < 1e-6: 39 | return 0 40 | v1_u = unit_vector(v1) 41 | v2_u = unit_vector(v2) 42 | return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) 43 | 44 | 45 | def x_rotation(vector, theta): 46 | """Rotates 3-D vector around x-axis""" 47 | R = np.array([[1, 0, 0], [0, np.cos(theta), -np.sin(theta)], [0, np.sin(theta), np.cos(theta)]]) 48 | return np.dot(R, vector) 49 | 50 | 51 | def y_rotation(vector, theta): 52 | """Rotates 3-D vector around y-axis""" 53 | R = np.array([[np.cos(theta), 0, np.sin(theta)], [0, 1, 0], [-np.sin(theta), 0, np.cos(theta)]]) 54 | return np.dot(R, vector) 55 | 56 | 57 | def z_rotation(vector, theta): 58 | """Rotates 3-D vector around z-axis""" 59 | R = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) 60 | return np.dot(R, vector) 61 | -------------------------------------------------------------------------------- /ensemble.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pickle 3 | 4 | import numpy as np 5 | from tqdm import tqdm 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--datasets', default='ntu/xsub', choices={'kinetics', 'ntu/xsub', 'ntu/xview'}, 9 | help='the work folder for storing results') 10 | parser.add_argument('--alpha', default=1, help='weighted summation') 11 | arg = parser.parse_args() 12 | 13 | dataset = arg.datasets 14 | label = open('./data/' + dataset + '/val_label.pkl', 'rb') 15 | label = np.array(pickle.load(label)) 16 | r1 = open('./work_dir/' + dataset + '/agcn_test_joint/epoch1_test_score.pkl', 'rb') 17 | r1 = list(pickle.load(r1).items()) 18 | r2 = open('./work_dir/' + dataset + '/agcn_test_bone/epoch1_test_score.pkl', 'rb') 19 | r2 = list(pickle.load(r2).items()) 20 | right_num = total_num = right_num_5 = 0 21 | for i in tqdm(range(len(label[0]))): 22 | _, l = label[:, i] 23 | _, r11 = r1[i] 24 | _, r22 = r2[i] 25 | r = r11 + r22 * arg.alpha 26 | rank_5 = r.argsort()[-5:] 27 | right_num_5 += int(int(l) in rank_5) 28 | r = np.argmax(r) 29 | right_num += int(r == int(l)) 30 | total_num += 1 31 | acc = right_num / total_num 32 | acc5 = right_num_5 / total_num 33 | print(acc, acc5) 34 | -------------------------------------------------------------------------------- /feeders/__init__.py: -------------------------------------------------------------------------------- 1 | from . import tools 2 | from . import feeder 3 | -------------------------------------------------------------------------------- /feeders/feeder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | import torch 4 | from torch.utils.data import Dataset 5 | import sys 6 | 7 | sys.path.extend(['../']) 8 | from feeders import tools 9 | 10 | 11 | class Feeder(Dataset): 12 | def __init__(self, data_path, label_path, 13 | random_choose=False, random_shift=False, random_move=False, 14 | window_size=-1, normalization=False, debug=False, use_mmap=True): 15 | """ 16 | 17 | :param data_path: 18 | :param label_path: 19 | :param random_choose: If true, randomly choose a portion of the input sequence 20 | :param random_shift: If true, randomly pad zeros at the begining or end of sequence 21 | :param random_move: 22 | :param window_size: The length of the output sequence 23 | :param normalization: If true, normalize input sequence 24 | :param debug: If true, only use the first 100 samples 25 | :param use_mmap: If true, use mmap mode to load data, which can save the running memory 26 | """ 27 | 28 | self.debug = debug 29 | self.data_path = data_path 30 | self.label_path = label_path 31 | self.random_choose = random_choose 32 | self.random_shift = random_shift 33 | self.random_move = random_move 34 | self.window_size = window_size 35 | self.normalization = normalization 36 | self.use_mmap = use_mmap 37 | self.load_data() 38 | if normalization: 39 | self.get_mean_map() 40 | 41 | def load_data(self): 42 | # data: N C V T M 43 | 44 | try: 45 | with open(self.label_path) as f: 46 | self.sample_name, self.label = pickle.load(f) 47 | except: 48 | # for pickle file from python2 49 | with open(self.label_path, 'rb') as f: 50 | self.sample_name, self.label = pickle.load(f, encoding='latin1') 51 | 52 | # load data 53 | if self.use_mmap: 54 | self.data = np.load(self.data_path, mmap_mode='r') 55 | else: 56 | self.data = np.load(self.data_path) 57 | if self.debug: 58 | self.label = self.label[0:100] 59 | self.data = self.data[0:100] 60 | self.sample_name = self.sample_name[0:100] 61 | 62 | def get_mean_map(self): 63 | data = self.data 64 | N, C, T, V, M = data.shape 65 | self.mean_map = data.mean(axis=2, keepdims=True).mean(axis=4, keepdims=True).mean(axis=0) 66 | self.std_map = data.transpose((0, 2, 4, 1, 3)).reshape((N * T * M, C * V)).std(axis=0).reshape((C, 1, V, 1)) 67 | 68 | def __len__(self): 69 | return len(self.label) 70 | 71 | def __iter__(self): 72 | return self 73 | 74 | def __getitem__(self, index): 75 | data_numpy = self.data[index] 76 | label = self.label[index] 77 | data_numpy = np.array(data_numpy) 78 | 79 | if self.normalization: 80 | data_numpy = (data_numpy - self.mean_map) / self.std_map 81 | if self.random_shift: 82 | data_numpy = tools.random_shift(data_numpy) 83 | if self.random_choose: 84 | data_numpy = tools.random_choose(data_numpy, self.window_size) 85 | elif self.window_size > 0: 86 | data_numpy = tools.auto_pading(data_numpy, self.window_size) 87 | if self.random_move: 88 | data_numpy = tools.random_move(data_numpy) 89 | 90 | return data_numpy, label, index 91 | 92 | def top_k(self, score, top_k): 93 | rank = score.argsort() 94 | hit_top_k = [l in rank[i, -top_k:] for i, l in enumerate(self.label)] 95 | return sum(hit_top_k) * 1.0 / len(hit_top_k) 96 | 97 | 98 | def import_class(name): 99 | components = name.split('.') 100 | mod = __import__(components[0]) 101 | for comp in components[1:]: 102 | mod = getattr(mod, comp) 103 | return mod 104 | 105 | 106 | def test(data_path, label_path, vid=None, graph=None, is_3d=False): 107 | ''' 108 | vis the samples using matplotlib 109 | :param data_path: 110 | :param label_path: 111 | :param vid: the id of sample 112 | :param graph: 113 | :param is_3d: when vis NTU, set it True 114 | :return: 115 | ''' 116 | import matplotlib.pyplot as plt 117 | loader = torch.utils.data.DataLoader( 118 | dataset=Feeder(data_path, label_path), 119 | batch_size=64, 120 | shuffle=False, 121 | num_workers=2) 122 | 123 | if vid is not None: 124 | sample_name = loader.dataset.sample_name 125 | sample_id = [name.split('.')[0] for name in sample_name] 126 | index = sample_id.index(vid) 127 | data, label, index = loader.dataset[index] 128 | data = data.reshape((1,) + data.shape) 129 | 130 | # for batch_idx, (data, label) in enumerate(loader): 131 | N, C, T, V, M = data.shape 132 | 133 | plt.ion() 134 | fig = plt.figure() 135 | if is_3d: 136 | from mpl_toolkits.mplot3d import Axes3D 137 | ax = fig.add_subplot(111, projection='3d') 138 | else: 139 | ax = fig.add_subplot(111) 140 | 141 | if graph is None: 142 | p_type = ['b.', 'g.', 'r.', 'c.', 'm.', 'y.', 'k.', 'k.', 'k.', 'k.'] 143 | pose = [ 144 | ax.plot(np.zeros(V), np.zeros(V), p_type[m])[0] for m in range(M) 145 | ] 146 | ax.axis([-1, 1, -1, 1]) 147 | for t in range(T): 148 | for m in range(M): 149 | pose[m].set_xdata(data[0, 0, t, :, m]) 150 | pose[m].set_ydata(data[0, 1, t, :, m]) 151 | fig.canvas.draw() 152 | plt.pause(0.001) 153 | else: 154 | p_type = ['b-', 'g-', 'r-', 'c-', 'm-', 'y-', 'k-', 'k-', 'k-', 'k-'] 155 | import sys 156 | from os import path 157 | sys.path.append( 158 | path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) 159 | G = import_class(graph)() 160 | edge = G.inward 161 | pose = [] 162 | for m in range(M): 163 | a = [] 164 | for i in range(len(edge)): 165 | if is_3d: 166 | a.append(ax.plot(np.zeros(3), np.zeros(3), p_type[m])[0]) 167 | else: 168 | a.append(ax.plot(np.zeros(2), np.zeros(2), p_type[m])[0]) 169 | pose.append(a) 170 | ax.axis([-1, 1, -1, 1]) 171 | if is_3d: 172 | ax.set_zlim3d(-1, 1) 173 | for t in range(T): 174 | for m in range(M): 175 | for i, (v1, v2) in enumerate(edge): 176 | x1 = data[0, :2, t, v1, m] 177 | x2 = data[0, :2, t, v2, m] 178 | if (x1.sum() != 0 and x2.sum() != 0) or v1 == 1 or v2 == 1: 179 | pose[m][i].set_xdata(data[0, 0, t, [v1, v2], m]) 180 | pose[m][i].set_ydata(data[0, 1, t, [v1, v2], m]) 181 | if is_3d: 182 | pose[m][i].set_3d_properties(data[0, 2, t, [v1, v2], m]) 183 | fig.canvas.draw() 184 | # plt.savefig('/home/lshi/Desktop/skeleton_sequence/' + str(t) + '.jpg') 185 | plt.pause(0.01) 186 | 187 | 188 | if __name__ == '__main__': 189 | import os 190 | 191 | os.environ['DISPLAY'] = 'localhost:10.0' 192 | data_path = "../data/ntu/xview/val_data_joint.npy" 193 | label_path = "../data/ntu/xview/val_label.pkl" 194 | graph = 'graph.ntu_rgb_d.Graph' 195 | test(data_path, label_path, vid='S004C001P003R001A032', graph=graph, is_3d=True) 196 | # data_path = "../data/kinetics/val_data.npy" 197 | # label_path = "../data/kinetics/val_label.pkl" 198 | # graph = 'graph.Kinetics' 199 | # test(data_path, label_path, vid='UOD7oll3Kqo', graph=graph) 200 | -------------------------------------------------------------------------------- /feeders/tools.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | 5 | 6 | def downsample(data_numpy, step, random_sample=True): 7 | # input: C,T,V,M 8 | begin = np.random.randint(step) if random_sample else 0 9 | return data_numpy[:, begin::step, :, :] 10 | 11 | 12 | def temporal_slice(data_numpy, step): 13 | # input: C,T,V,M 14 | C, T, V, M = data_numpy.shape 15 | return data_numpy.reshape(C, T / step, step, V, M).transpose( 16 | (0, 1, 3, 2, 4)).reshape(C, T / step, V, step * M) 17 | 18 | 19 | def mean_subtractor(data_numpy, mean): 20 | # input: C,T,V,M 21 | # naive version 22 | if mean == 0: 23 | return 24 | C, T, V, M = data_numpy.shape 25 | valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0 26 | begin = valid_frame.argmax() 27 | end = len(valid_frame) - valid_frame[::-1].argmax() 28 | data_numpy[:, :end, :, :] = data_numpy[:, :end, :, :] - mean 29 | return data_numpy 30 | 31 | 32 | def auto_pading(data_numpy, size, random_pad=False): 33 | C, T, V, M = data_numpy.shape 34 | if T < size: 35 | begin = random.randint(0, size - T) if random_pad else 0 36 | data_numpy_paded = np.zeros((C, size, V, M)) 37 | data_numpy_paded[:, begin:begin + T, :, :] = data_numpy 38 | return data_numpy_paded 39 | else: 40 | return data_numpy 41 | 42 | 43 | def random_choose(data_numpy, size, auto_pad=True): 44 | # input: C,T,V,M 随机选择其中一段,不是很合理。因为有0 45 | C, T, V, M = data_numpy.shape 46 | if T == size: 47 | return data_numpy 48 | elif T < size: 49 | if auto_pad: 50 | return auto_pading(data_numpy, size, random_pad=True) 51 | else: 52 | return data_numpy 53 | else: 54 | begin = random.randint(0, T - size) 55 | return data_numpy[:, begin:begin + size, :, :] 56 | 57 | 58 | def random_move(data_numpy, 59 | angle_candidate=[-10., -5., 0., 5., 10.], 60 | scale_candidate=[0.9, 1.0, 1.1], 61 | transform_candidate=[-0.2, -0.1, 0.0, 0.1, 0.2], 62 | move_time_candidate=[1]): 63 | # input: C,T,V,M 64 | C, T, V, M = data_numpy.shape 65 | move_time = random.choice(move_time_candidate) 66 | node = np.arange(0, T, T * 1.0 / move_time).round().astype(int) 67 | node = np.append(node, T) 68 | num_node = len(node) 69 | 70 | A = np.random.choice(angle_candidate, num_node) 71 | S = np.random.choice(scale_candidate, num_node) 72 | T_x = np.random.choice(transform_candidate, num_node) 73 | T_y = np.random.choice(transform_candidate, num_node) 74 | 75 | a = np.zeros(T) 76 | s = np.zeros(T) 77 | t_x = np.zeros(T) 78 | t_y = np.zeros(T) 79 | 80 | # linspace 81 | for i in range(num_node - 1): 82 | a[node[i]:node[i + 1]] = np.linspace( 83 | A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180 84 | s[node[i]:node[i + 1]] = np.linspace(S[i], S[i + 1], 85 | node[i + 1] - node[i]) 86 | t_x[node[i]:node[i + 1]] = np.linspace(T_x[i], T_x[i + 1], 87 | node[i + 1] - node[i]) 88 | t_y[node[i]:node[i + 1]] = np.linspace(T_y[i], T_y[i + 1], 89 | node[i + 1] - node[i]) 90 | 91 | theta = np.array([[np.cos(a) * s, -np.sin(a) * s], 92 | [np.sin(a) * s, np.cos(a) * s]]) # xuanzhuan juzhen 93 | 94 | # perform transformation 95 | for i_frame in range(T): 96 | xy = data_numpy[0:2, i_frame, :, :] 97 | new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1)) 98 | new_xy[0] += t_x[i_frame] 99 | new_xy[1] += t_y[i_frame] # pingyi bianhuan 100 | data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M) 101 | 102 | return data_numpy 103 | 104 | 105 | def random_shift(data_numpy): 106 | # input: C,T,V,M 偏移其中一段 107 | C, T, V, M = data_numpy.shape 108 | data_shift = np.zeros(data_numpy.shape) 109 | valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0 110 | begin = valid_frame.argmax() 111 | end = len(valid_frame) - valid_frame[::-1].argmax() 112 | 113 | size = end - begin 114 | bias = random.randint(0, T - size) 115 | data_shift[:, bias:bias + size, :, :] = data_numpy[:, begin:end, :, :] 116 | 117 | return data_shift 118 | 119 | 120 | def openpose_match(data_numpy): 121 | C, T, V, M = data_numpy.shape 122 | assert (C == 3) 123 | score = data_numpy[2, :, :, :].sum(axis=1) 124 | # the rank of body confidence in each frame (shape: T-1, M) 125 | rank = (-score[0:T - 1]).argsort(axis=1).reshape(T - 1, M) 126 | 127 | # data of frame 1 128 | xy1 = data_numpy[0:2, 0:T - 1, :, :].reshape(2, T - 1, V, M, 1) 129 | # data of frame 2 130 | xy2 = data_numpy[0:2, 1:T, :, :].reshape(2, T - 1, V, 1, M) 131 | # square of distance between frame 1&2 (shape: T-1, M, M) 132 | distance = ((xy2 - xy1) ** 2).sum(axis=2).sum(axis=0) 133 | 134 | # match pose 135 | forward_map = np.zeros((T, M), dtype=int) - 1 136 | forward_map[0] = range(M) 137 | for m in range(M): 138 | choose = (rank == m) 139 | forward = distance[choose].argmin(axis=1) 140 | for t in range(T - 1): 141 | distance[t, :, forward[t]] = np.inf 142 | forward_map[1:][choose] = forward 143 | assert (np.all(forward_map >= 0)) 144 | 145 | # string data 146 | for t in range(T - 1): 147 | forward_map[t + 1] = forward_map[t + 1][forward_map[t]] 148 | 149 | # generate data 150 | new_data_numpy = np.zeros(data_numpy.shape) 151 | for t in range(T): 152 | new_data_numpy[:, t, :, :] = data_numpy[:, t, :, forward_map[ 153 | t]].transpose(1, 2, 0) 154 | data_numpy = new_data_numpy 155 | 156 | # score sort 157 | trace_score = data_numpy[2, :, :, :].sum(axis=1).sum(axis=0) 158 | rank = (-trace_score).argsort() 159 | data_numpy = data_numpy[:, :, :, rank] 160 | 161 | return data_numpy 162 | -------------------------------------------------------------------------------- /graph/__init__.py: -------------------------------------------------------------------------------- 1 | from . import tools 2 | from . import ntu_rgb_d 3 | from . import kinetics 4 | -------------------------------------------------------------------------------- /graph/kinetics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | 4 | sys.path.extend(['../']) 5 | from graph import tools 6 | import networkx as nx 7 | 8 | # Joint index: 9 | # {0, "Nose"} 10 | # {1, "Neck"}, 11 | # {2, "RShoulder"}, 12 | # {3, "RElbow"}, 13 | # {4, "RWrist"}, 14 | # {5, "LShoulder"}, 15 | # {6, "LElbow"}, 16 | # {7, "LWrist"}, 17 | # {8, "RHip"}, 18 | # {9, "RKnee"}, 19 | # {10, "RAnkle"}, 20 | # {11, "LHip"}, 21 | # {12, "LKnee"}, 22 | # {13, "LAnkle"}, 23 | # {14, "REye"}, 24 | # {15, "LEye"}, 25 | # {16, "REar"}, 26 | # {17, "LEar"}, 27 | 28 | # Edge format: (origin, neighbor) 29 | num_node = 18 30 | self_link = [(i, i) for i in range(num_node)] 31 | inward = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12, 11), (10, 9), (9, 8), 32 | (11, 5), (8, 2), (5, 1), (2, 1), (0, 1), (15, 0), (14, 0), (17, 15), 33 | (16, 14)] 34 | outward = [(j, i) for (i, j) in inward] 35 | neighbor = inward + outward 36 | 37 | 38 | class Graph: 39 | def __init__(self, labeling_mode='spatial'): 40 | self.A = self.get_adjacency_matrix(labeling_mode) 41 | self.num_node = num_node 42 | self.self_link = self_link 43 | self.inward = inward 44 | self.outward = outward 45 | self.neighbor = neighbor 46 | 47 | def get_adjacency_matrix(self, labeling_mode=None): 48 | if labeling_mode is None: 49 | return self.A 50 | if labeling_mode == 'spatial': 51 | A = tools.get_spatial_graph(num_node, self_link, inward, outward) 52 | else: 53 | raise ValueError() 54 | return A 55 | 56 | 57 | if __name__ == '__main__': 58 | A = Graph('spatial').get_adjacency_matrix() 59 | print('') 60 | -------------------------------------------------------------------------------- /graph/ntu_rgb_d.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.extend(['../']) 4 | from graph import tools 5 | 6 | num_node = 25 7 | self_link = [(i, i) for i in range(num_node)] 8 | inward_ori_index = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6), 9 | (8, 7), (9, 21), (10, 9), (11, 10), (12, 11), (13, 1), 10 | (14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18), 11 | (20, 19), (22, 23), (23, 8), (24, 25), (25, 12)] 12 | inward = [(i - 1, j - 1) for (i, j) in inward_ori_index] 13 | outward = [(j, i) for (i, j) in inward] 14 | neighbor = inward + outward 15 | 16 | 17 | class Graph: 18 | def __init__(self, labeling_mode='spatial'): 19 | self.A = self.get_adjacency_matrix(labeling_mode) 20 | self.num_node = num_node 21 | self.self_link = self_link 22 | self.inward = inward 23 | self.outward = outward 24 | self.neighbor = neighbor 25 | 26 | def get_adjacency_matrix(self, labeling_mode=None): 27 | if labeling_mode is None: 28 | return self.A 29 | if labeling_mode == 'spatial': 30 | A = tools.get_spatial_graph(num_node, self_link, inward, outward) 31 | else: 32 | raise ValueError() 33 | return A 34 | 35 | 36 | if __name__ == '__main__': 37 | import matplotlib.pyplot as plt 38 | import os 39 | 40 | # os.environ['DISPLAY'] = 'localhost:11.0' 41 | A = Graph('spatial').get_adjacency_matrix() 42 | for i in A: 43 | plt.imshow(i, cmap='gray') 44 | plt.show() 45 | print(A) 46 | -------------------------------------------------------------------------------- /graph/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def edge2mat(link, num_node): 5 | A = np.zeros((num_node, num_node)) 6 | for i, j in link: 7 | A[j, i] = 1 8 | return A 9 | 10 | 11 | def normalize_digraph(A): # 除以每列的和 12 | Dl = np.sum(A, 0) 13 | h, w = A.shape 14 | Dn = np.zeros((w, w)) 15 | for i in range(w): 16 | if Dl[i] > 0: 17 | Dn[i, i] = Dl[i] ** (-1) 18 | AD = np.dot(A, Dn) 19 | return AD 20 | 21 | 22 | def get_spatial_graph(num_node, self_link, inward, outward): 23 | I = edge2mat(self_link, num_node) 24 | In = normalize_digraph(edge2mat(inward, num_node)) 25 | Out = normalize_digraph(edge2mat(outward, num_node)) 26 | A = np.stack((I, In, Out)) 27 | return A 28 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import argparse 5 | import inspect 6 | import os 7 | import pickle 8 | import random 9 | import shutil 10 | import time 11 | from collections import OrderedDict 12 | 13 | import numpy as np 14 | # torch 15 | import torch 16 | import torch.backends.cudnn as cudnn 17 | import torch.nn as nn 18 | import torch.optim as optim 19 | import yaml 20 | from tensorboardX import SummaryWriter 21 | from torch.autograd import Variable 22 | from torch.optim.lr_scheduler import _LRScheduler 23 | from tqdm import tqdm 24 | 25 | 26 | class GradualWarmupScheduler(_LRScheduler): 27 | def __init__(self, optimizer, total_epoch, after_scheduler=None): 28 | self.total_epoch = total_epoch 29 | self.after_scheduler = after_scheduler 30 | self.finished = False 31 | self.last_epoch = -1 32 | super().__init__(optimizer) 33 | 34 | def get_lr(self): 35 | return [base_lr * (self.last_epoch + 1) / self.total_epoch for base_lr in self.base_lrs] 36 | 37 | def step(self, epoch=None, metric=None): 38 | if self.last_epoch >= self.total_epoch - 1: 39 | if metric is None: 40 | return self.after_scheduler.step(epoch) 41 | else: 42 | return self.after_scheduler.step(metric, epoch) 43 | else: 44 | return super(GradualWarmupScheduler, self).step(epoch) 45 | 46 | 47 | def init_seed(_): 48 | torch.cuda.manual_seed_all(1) 49 | torch.manual_seed(1) 50 | np.random.seed(1) 51 | random.seed(1) 52 | # torch.backends.cudnn.enabled = False 53 | torch.backends.cudnn.deterministic = True 54 | torch.backends.cudnn.benchmark = False 55 | 56 | 57 | def get_parser(): 58 | # parameter priority: command line > config > default 59 | parser = argparse.ArgumentParser( 60 | description='Spatial Temporal Graph Convolution Network') 61 | parser.add_argument( 62 | '--work-dir', 63 | default='./work_dir/temp', 64 | help='the work folder for storing results') 65 | 66 | parser.add_argument('-model_saved_name', default='') 67 | parser.add_argument( 68 | '--config', 69 | default='./config/nturgbd-cross-view/test_bone.yaml', 70 | help='path to the configuration file') 71 | 72 | # processor 73 | parser.add_argument( 74 | '--phase', default='train', help='must be train or test') 75 | parser.add_argument( 76 | '--save-score', 77 | type=str2bool, 78 | default=False, 79 | help='if ture, the classification score will be stored') 80 | 81 | # visulize and debug 82 | parser.add_argument( 83 | '--seed', type=int, default=1, help='random seed for pytorch') 84 | parser.add_argument( 85 | '--log-interval', 86 | type=int, 87 | default=100, 88 | help='the interval for printing messages (#iteration)') 89 | parser.add_argument( 90 | '--save-interval', 91 | type=int, 92 | default=2, 93 | help='the interval for storing models (#iteration)') 94 | parser.add_argument( 95 | '--eval-interval', 96 | type=int, 97 | default=5, 98 | help='the interval for evaluating models (#iteration)') 99 | parser.add_argument( 100 | '--print-log', 101 | type=str2bool, 102 | default=True, 103 | help='print logging or not') 104 | parser.add_argument( 105 | '--show-topk', 106 | type=int, 107 | default=[1, 5], 108 | nargs='+', 109 | help='which Top K accuracy will be shown') 110 | 111 | # feeder 112 | parser.add_argument( 113 | '--feeder', default='feeder.feeder', help='data loader will be used') 114 | parser.add_argument( 115 | '--num-worker', 116 | type=int, 117 | default=32, 118 | help='the number of worker for data loader') 119 | parser.add_argument( 120 | '--train-feeder-args', 121 | default=dict(), 122 | help='the arguments of data loader for training') 123 | parser.add_argument( 124 | '--test-feeder-args', 125 | default=dict(), 126 | help='the arguments of data loader for test') 127 | 128 | # model 129 | parser.add_argument('--model', default=None, help='the model will be used') 130 | parser.add_argument( 131 | '--model-args', 132 | type=dict, 133 | default=dict(), 134 | help='the arguments of model') 135 | parser.add_argument( 136 | '--weights', 137 | default=None, 138 | help='the weights for network initialization') 139 | parser.add_argument( 140 | '--ignore-weights', 141 | type=str, 142 | default=[], 143 | nargs='+', 144 | help='the name of weights which will be ignored in the initialization') 145 | 146 | # optim 147 | parser.add_argument( 148 | '--base-lr', type=float, default=0.01, help='initial learning rate') 149 | parser.add_argument( 150 | '--step', 151 | type=int, 152 | default=[20, 40, 60], 153 | nargs='+', 154 | help='the epoch where optimizer reduce the learning rate') 155 | parser.add_argument( 156 | '--device', 157 | type=int, 158 | default=0, 159 | nargs='+', 160 | help='the indexes of GPUs for training or testing') 161 | parser.add_argument('--optimizer', default='SGD', help='type of optimizer') 162 | parser.add_argument( 163 | '--nesterov', type=str2bool, default=False, help='use nesterov or not') 164 | parser.add_argument( 165 | '--batch-size', type=int, default=256, help='training batch size') 166 | parser.add_argument( 167 | '--test-batch-size', type=int, default=256, help='test batch size') 168 | parser.add_argument( 169 | '--start-epoch', 170 | type=int, 171 | default=0, 172 | help='start training from which epoch') 173 | parser.add_argument( 174 | '--num-epoch', 175 | type=int, 176 | default=80, 177 | help='stop training in which epoch') 178 | parser.add_argument( 179 | '--weight-decay', 180 | type=float, 181 | default=0.0005, 182 | help='weight decay for optimizer') 183 | parser.add_argument('--only_train_part', default=False) 184 | parser.add_argument('--only_train_epoch', default=0) 185 | parser.add_argument('--warm_up_epoch', default=0) 186 | return parser 187 | 188 | 189 | class Processor(): 190 | """ 191 | Processor for Skeleton-based Action Recgnition 192 | """ 193 | 194 | def __init__(self, arg): 195 | self.arg = arg 196 | self.save_arg() 197 | if arg.phase == 'train': 198 | if not arg.train_feeder_args['debug']: 199 | if os.path.isdir(arg.model_saved_name): 200 | print('log_dir: ', arg.model_saved_name, 'already exist') 201 | answer = input('delete it? y/n:') 202 | if answer == 'y': 203 | shutil.rmtree(arg.model_saved_name) 204 | print('Dir removed: ', arg.model_saved_name) 205 | input('Refresh the website of tensorboard by pressing any keys') 206 | else: 207 | print('Dir not removed: ', arg.model_saved_name) 208 | self.train_writer = SummaryWriter(os.path.join(arg.model_saved_name, 'train'), 'train') 209 | self.val_writer = SummaryWriter(os.path.join(arg.model_saved_name, 'val'), 'val') 210 | else: 211 | self.train_writer = self.val_writer = SummaryWriter(os.path.join(arg.model_saved_name, 'test'), 'test') 212 | self.global_step = 0 213 | self.load_model() 214 | self.load_optimizer() 215 | self.load_data() 216 | self.lr = self.arg.base_lr 217 | self.best_acc = 0 218 | 219 | def load_data(self): 220 | Feeder = import_class(self.arg.feeder) 221 | self.data_loader = dict() 222 | if self.arg.phase == 'train': 223 | self.data_loader['train'] = torch.utils.data.DataLoader( 224 | dataset=Feeder(**self.arg.train_feeder_args), 225 | batch_size=self.arg.batch_size, 226 | shuffle=True, 227 | num_workers=self.arg.num_worker, 228 | drop_last=True, 229 | worker_init_fn=init_seed) 230 | self.data_loader['test'] = torch.utils.data.DataLoader( 231 | dataset=Feeder(**self.arg.test_feeder_args), 232 | batch_size=self.arg.test_batch_size, 233 | shuffle=False, 234 | num_workers=self.arg.num_worker, 235 | drop_last=False, 236 | worker_init_fn=init_seed) 237 | 238 | def load_model(self): 239 | output_device = self.arg.device[0] if type(self.arg.device) is list else self.arg.device 240 | self.output_device = output_device 241 | Model = import_class(self.arg.model) 242 | shutil.copy2(inspect.getfile(Model), self.arg.work_dir) 243 | print(Model) 244 | self.model = Model(**self.arg.model_args).cuda(output_device) 245 | print(self.model) 246 | self.loss = nn.CrossEntropyLoss().cuda(output_device) 247 | 248 | if self.arg.weights: 249 | self.global_step = int(arg.weights[:-3].split('-')[-1]) 250 | self.print_log('Load weights from {}.'.format(self.arg.weights)) 251 | if '.pkl' in self.arg.weights: 252 | with open(self.arg.weights, 'r') as f: 253 | weights = pickle.load(f) 254 | else: 255 | weights = torch.load(self.arg.weights) 256 | 257 | weights = OrderedDict( 258 | [[k.split('module.')[-1], 259 | v.cuda(output_device)] for k, v in weights.items()]) 260 | 261 | keys = list(weights.keys()) 262 | for w in self.arg.ignore_weights: 263 | for key in keys: 264 | if w in key: 265 | if weights.pop(key, None) is not None: 266 | self.print_log('Sucessfully Remove Weights: {}.'.format(key)) 267 | else: 268 | self.print_log('Can Not Remove Weights: {}.'.format(key)) 269 | 270 | try: 271 | self.model.load_state_dict(weights) 272 | except: 273 | state = self.model.state_dict() 274 | diff = list(set(state.keys()).difference(set(weights.keys()))) 275 | print('Can not find these weights:') 276 | for d in diff: 277 | print(' ' + d) 278 | state.update(weights) 279 | self.model.load_state_dict(state) 280 | 281 | if type(self.arg.device) is list: 282 | if len(self.arg.device) > 1: 283 | self.model = nn.DataParallel( 284 | self.model, 285 | device_ids=self.arg.device, 286 | output_device=output_device) 287 | 288 | def load_optimizer(self): 289 | if self.arg.optimizer == 'SGD': 290 | self.optimizer = optim.SGD( 291 | self.model.parameters(), 292 | lr=self.arg.base_lr, 293 | momentum=0.9, 294 | nesterov=self.arg.nesterov, 295 | weight_decay=self.arg.weight_decay) 296 | elif self.arg.optimizer == 'Adam': 297 | self.optimizer = optim.Adam( 298 | self.model.parameters(), 299 | lr=self.arg.base_lr, 300 | weight_decay=self.arg.weight_decay) 301 | else: 302 | raise ValueError() 303 | 304 | lr_scheduler_pre = optim.lr_scheduler.MultiStepLR( 305 | self.optimizer, milestones=self.arg.step, gamma=0.1) 306 | 307 | self.lr_scheduler = GradualWarmupScheduler(self.optimizer, total_epoch=self.arg.warm_up_epoch, 308 | after_scheduler=lr_scheduler_pre) 309 | self.print_log('using warm up, epoch: {}'.format(self.arg.warm_up_epoch)) 310 | 311 | def save_arg(self): 312 | # save arg 313 | arg_dict = vars(self.arg) 314 | if not os.path.exists(self.arg.work_dir): 315 | os.makedirs(self.arg.work_dir) 316 | with open('{}/config.yaml'.format(self.arg.work_dir), 'w') as f: 317 | yaml.dump(arg_dict, f) 318 | 319 | def adjust_learning_rate(self, epoch): 320 | if self.arg.optimizer == 'SGD' or self.arg.optimizer == 'Adam': 321 | if epoch < self.arg.warm_up_epoch: 322 | lr = self.arg.base_lr * (epoch + 1) / self.arg.warm_up_epoch 323 | else: 324 | lr = self.arg.base_lr * ( 325 | 0.1 ** np.sum(epoch >= np.array(self.arg.step))) 326 | for param_group in self.optimizer.param_groups: 327 | param_group['lr'] = lr 328 | return lr 329 | else: 330 | raise ValueError() 331 | 332 | def print_time(self): 333 | localtime = time.asctime(time.localtime(time.time())) 334 | self.print_log("Local current time : " + localtime) 335 | 336 | def print_log(self, str, print_time=True): 337 | if print_time: 338 | localtime = time.asctime(time.localtime(time.time())) 339 | str = "[ " + localtime + ' ] ' + str 340 | print(str) 341 | if self.arg.print_log: 342 | with open('{}/log.txt'.format(self.arg.work_dir), 'a') as f: 343 | print(str, file=f) 344 | 345 | def record_time(self): 346 | self.cur_time = time.time() 347 | return self.cur_time 348 | 349 | def split_time(self): 350 | split_time = time.time() - self.cur_time 351 | self.record_time() 352 | return split_time 353 | 354 | def train(self, epoch, save_model=False): 355 | self.model.train() 356 | self.print_log('Training epoch: {}'.format(epoch + 1)) 357 | loader = self.data_loader['train'] 358 | self.adjust_learning_rate(epoch) 359 | # for name, param in self.model.named_parameters(): 360 | # self.train_writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch) 361 | loss_value = [] 362 | self.train_writer.add_scalar('epoch', epoch, self.global_step) 363 | self.record_time() 364 | timer = dict(dataloader=0.001, model=0.001, statistics=0.001) 365 | process = tqdm(loader) 366 | if self.arg.only_train_part: 367 | if epoch > self.arg.only_train_epoch: 368 | print('only train part, require grad') 369 | for key, value in self.model.named_parameters(): 370 | if 'PA' in key: 371 | value.requires_grad = True 372 | # print(key + '-require grad') 373 | else: 374 | print('only train part, do not require grad') 375 | for key, value in self.model.named_parameters(): 376 | if 'PA' in key: 377 | value.requires_grad = False 378 | # print(key + '-not require grad') 379 | for batch_idx, (data, label, index) in enumerate(process): 380 | self.global_step += 1 381 | # get data 382 | data = Variable(data.float().cuda(self.output_device), requires_grad=False) 383 | label = Variable(label.long().cuda(self.output_device), requires_grad=False) 384 | timer['dataloader'] += self.split_time() 385 | 386 | # forward 387 | output = self.model(data) 388 | # if batch_idx == 0 and epoch == 0: 389 | # self.train_writer.add_graph(self.model, output) 390 | if isinstance(output, tuple): 391 | output, l1 = output 392 | l1 = l1.mean() 393 | else: 394 | l1 = 0 395 | loss = self.loss(output, label) + l1 396 | 397 | # backward 398 | self.optimizer.zero_grad() 399 | loss.backward() 400 | self.optimizer.step() 401 | loss_value.append(loss.data.item()) 402 | timer['model'] += self.split_time() 403 | 404 | value, predict_label = torch.max(output.data, 1) 405 | acc = torch.mean((predict_label == label.data).float()) 406 | self.train_writer.add_scalar('acc', acc, self.global_step) 407 | self.train_writer.add_scalar('loss', loss.data.item(), self.global_step) 408 | self.train_writer.add_scalar('loss_l1', l1, self.global_step) 409 | # self.train_writer.add_scalar('batch_time', process.iterable.last_duration, self.global_step) 410 | 411 | # statistics 412 | self.lr = self.optimizer.param_groups[0]['lr'] 413 | self.train_writer.add_scalar('lr', self.lr, self.global_step) 414 | # if self.global_step % self.arg.log_interval == 0: 415 | # self.print_log( 416 | # '\tBatch({}/{}) done. Loss: {:.4f} lr:{:.6f}'.format( 417 | # batch_idx, len(loader), loss.data[0], lr)) 418 | timer['statistics'] += self.split_time() 419 | 420 | # statistics of time consumption and loss 421 | proportion = { 422 | k: '{:02d}%'.format(int(round(v * 100 / sum(timer.values())))) 423 | for k, v in timer.items() 424 | } 425 | self.print_log( 426 | '\tMean training loss: {:.4f}.'.format(np.mean(loss_value))) 427 | self.print_log( 428 | '\tTime consumption: [Data]{dataloader}, [Network]{model}'.format( 429 | **proportion)) 430 | 431 | if save_model: 432 | state_dict = self.model.state_dict() 433 | weights = OrderedDict([[k.split('module.')[-1], 434 | v.cpu()] for k, v in state_dict.items()]) 435 | 436 | torch.save(weights, self.arg.model_saved_name + '-' + str(epoch) + '-' + str(int(self.global_step)) + '.pt') 437 | 438 | def eval(self, epoch, save_score=False, loader_name=['test'], wrong_file=None, result_file=None): 439 | if wrong_file is not None: 440 | f_w = open(wrong_file, 'w') 441 | if result_file is not None: 442 | f_r = open(result_file, 'w') 443 | self.model.eval() 444 | self.print_log('Eval epoch: {}'.format(epoch + 1)) 445 | for ln in loader_name: 446 | loss_value = [] 447 | score_frag = [] 448 | right_num_total = 0 449 | total_num = 0 450 | loss_total = 0 451 | step = 0 452 | process = tqdm(self.data_loader[ln]) 453 | for batch_idx, (data, label, index) in enumerate(process): 454 | with torch.no_grad(): 455 | data = Variable( 456 | data.float().cuda(self.output_device), 457 | requires_grad=False, 458 | volatile=True) 459 | label = Variable( 460 | label.long().cuda(self.output_device), 461 | requires_grad=False, 462 | volatile=True) 463 | output = self.model(data) 464 | if isinstance(output, tuple): 465 | output, l1 = output 466 | l1 = l1.mean() 467 | else: 468 | l1 = 0 469 | loss = self.loss(output, label) 470 | score_frag.append(output.data.cpu().numpy()) 471 | loss_value.append(loss.data.item()) 472 | 473 | _, predict_label = torch.max(output.data, 1) 474 | step += 1 475 | 476 | if wrong_file is not None or result_file is not None: 477 | predict = list(predict_label.cpu().numpy()) 478 | true = list(label.data.cpu().numpy()) 479 | for i, x in enumerate(predict): 480 | if result_file is not None: 481 | f_r.write(str(x) + ',' + str(true[i]) + '\n') 482 | if x != true[i] and wrong_file is not None: 483 | f_w.write(str(index[i]) + ',' + str(x) + ',' + str(true[i]) + '\n') 484 | score = np.concatenate(score_frag) 485 | loss = np.mean(loss_value) 486 | accuracy = self.data_loader[ln].dataset.top_k(score, 1) 487 | if accuracy > self.best_acc: 488 | self.best_acc = accuracy 489 | # self.lr_scheduler.step(loss) 490 | print('Accuracy: ', accuracy, ' model: ', self.arg.model_saved_name) 491 | if self.arg.phase == 'train': 492 | self.val_writer.add_scalar('loss', loss, self.global_step) 493 | self.val_writer.add_scalar('loss_l1', l1, self.global_step) 494 | self.val_writer.add_scalar('acc', accuracy, self.global_step) 495 | 496 | score_dict = dict( 497 | zip(self.data_loader[ln].dataset.sample_name, score)) 498 | self.print_log('\tMean {} loss of {} batches: {}.'.format( 499 | ln, len(self.data_loader[ln]), np.mean(loss_value))) 500 | for k in self.arg.show_topk: 501 | self.print_log('\tTop{}: {:.2f}%'.format( 502 | k, 100 * self.data_loader[ln].dataset.top_k(score, k))) 503 | 504 | if save_score: 505 | with open('{}/epoch{}_{}_score.pkl'.format( 506 | self.arg.work_dir, epoch + 1, ln), 'wb') as f: 507 | pickle.dump(score_dict, f) 508 | 509 | def start(self): 510 | if self.arg.phase == 'train': 511 | self.print_log('Parameters:\n{}\n'.format(str(vars(self.arg)))) 512 | self.global_step = self.arg.start_epoch * len(self.data_loader['train']) / self.arg.batch_size 513 | for epoch in range(self.arg.start_epoch, self.arg.num_epoch): 514 | if self.lr < 1e-3: 515 | break 516 | save_model = ((epoch + 1) % self.arg.save_interval == 0) or ( 517 | epoch + 1 == self.arg.num_epoch) 518 | 519 | self.train(epoch, save_model=save_model) 520 | 521 | self.eval( 522 | epoch, 523 | save_score=self.arg.save_score, 524 | loader_name=['test']) 525 | 526 | print('best accuracy: ', self.best_acc, ' model_name: ', self.arg.model_saved_name) 527 | 528 | elif self.arg.phase == 'test': 529 | if not self.arg.test_feeder_args['debug']: 530 | wf = self.arg.model_saved_name + '_wrong.txt' 531 | rf = self.arg.model_saved_name + '_right.txt' 532 | else: 533 | wf = rf = None 534 | if self.arg.weights is None: 535 | raise ValueError('Please appoint --weights.') 536 | self.arg.print_log = False 537 | self.print_log('Model: {}.'.format(self.arg.model)) 538 | self.print_log('Weights: {}.'.format(self.arg.weights)) 539 | self.eval(epoch=0, save_score=self.arg.save_score, loader_name=['test'], wrong_file=wf, result_file=rf) 540 | self.print_log('Done.\n') 541 | 542 | 543 | def str2bool(v): 544 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 545 | return True 546 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 547 | return False 548 | else: 549 | raise argparse.ArgumentTypeError('Boolean value expected.') 550 | 551 | 552 | def import_class(name): 553 | components = name.split('.') 554 | mod = __import__(components[0]) # import return model 555 | for comp in components[1:]: 556 | mod = getattr(mod, comp) 557 | return mod 558 | 559 | 560 | if __name__ == '__main__': 561 | parser = get_parser() 562 | 563 | # load arg form config file 564 | p = parser.parse_args() 565 | if p.config is not None: 566 | with open(p.config, 'r') as f: 567 | default_arg = yaml.load(f) 568 | key = vars(p).keys() 569 | for k in default_arg.keys(): 570 | if k not in key: 571 | print('WRONG ARG: {}'.format(k)) 572 | assert (k in key) 573 | parser.set_defaults(**default_arg) 574 | 575 | arg = parser.parse_args() 576 | init_seed(0) 577 | processor = Processor(arg) 578 | processor.start() 579 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- 1 | from . import agcn, aagcn 2 | -------------------------------------------------------------------------------- /model/aagcn.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | 8 | 9 | def import_class(name): 10 | components = name.split('.') 11 | mod = __import__(components[0]) 12 | for comp in components[1:]: 13 | mod = getattr(mod, comp) 14 | return mod 15 | 16 | 17 | def conv_branch_init(conv, branches): 18 | weight = conv.weight 19 | n = weight.size(0) 20 | k1 = weight.size(1) 21 | k2 = weight.size(2) 22 | nn.init.normal_(weight, 0, math.sqrt(2. / (n * k1 * k2 * branches))) 23 | nn.init.constant_(conv.bias, 0) 24 | 25 | 26 | def conv_init(conv): 27 | nn.init.kaiming_normal_(conv.weight, mode='fan_out') 28 | nn.init.constant_(conv.bias, 0) 29 | 30 | 31 | def bn_init(bn, scale): 32 | nn.init.constant_(bn.weight, scale) 33 | nn.init.constant_(bn.bias, 0) 34 | 35 | 36 | class unit_tcn(nn.Module): 37 | def __init__(self, in_channels, out_channels, kernel_size=9, stride=1): 38 | super(unit_tcn, self).__init__() 39 | pad = int((kernel_size - 1) / 2) 40 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0), 41 | stride=(stride, 1)) 42 | 43 | self.bn = nn.BatchNorm2d(out_channels) 44 | self.relu = nn.ReLU(inplace=True) 45 | conv_init(self.conv) 46 | bn_init(self.bn, 1) 47 | 48 | def forward(self, x): 49 | x = self.bn(self.conv(x)) 50 | return x 51 | 52 | 53 | class unit_gcn(nn.Module): 54 | def __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3, adaptive=True, attention=True): 55 | super(unit_gcn, self).__init__() 56 | inter_channels = out_channels // coff_embedding 57 | self.inter_c = inter_channels 58 | self.out_c = out_channels 59 | self.in_c = in_channels 60 | self.num_subset = num_subset 61 | num_jpts = A.shape[-1] 62 | 63 | self.conv_d = nn.ModuleList() 64 | for i in range(self.num_subset): 65 | self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1)) 66 | 67 | if adaptive: 68 | self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32))) 69 | self.alpha = nn.Parameter(torch.zeros(1)) 70 | # self.beta = nn.Parameter(torch.ones(1)) 71 | # nn.init.constant_(self.PA, 1e-6) 72 | # self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False) 73 | # self.A = self.PA 74 | self.conv_a = nn.ModuleList() 75 | self.conv_b = nn.ModuleList() 76 | for i in range(self.num_subset): 77 | self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1)) 78 | self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1)) 79 | else: 80 | self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False) 81 | self.adaptive = adaptive 82 | 83 | if attention: 84 | # self.beta = nn.Parameter(torch.zeros(1)) 85 | # self.gamma = nn.Parameter(torch.zeros(1)) 86 | # unified attention 87 | # self.Attention = nn.Parameter(torch.ones(num_jpts)) 88 | 89 | # temporal attention 90 | self.conv_ta = nn.Conv1d(out_channels, 1, 9, padding=4) 91 | nn.init.constant_(self.conv_ta.weight, 0) 92 | nn.init.constant_(self.conv_ta.bias, 0) 93 | 94 | # s attention 95 | ker_jpt = num_jpts - 1 if not num_jpts % 2 else num_jpts 96 | pad = (ker_jpt - 1) // 2 97 | self.conv_sa = nn.Conv1d(out_channels, 1, ker_jpt, padding=pad) 98 | nn.init.xavier_normal_(self.conv_sa.weight) 99 | nn.init.constant_(self.conv_sa.bias, 0) 100 | 101 | # channel attention 102 | rr = 2 103 | self.fc1c = nn.Linear(out_channels, out_channels // rr) 104 | self.fc2c = nn.Linear(out_channels // rr, out_channels) 105 | nn.init.kaiming_normal_(self.fc1c.weight) 106 | nn.init.constant_(self.fc1c.bias, 0) 107 | nn.init.constant_(self.fc2c.weight, 0) 108 | nn.init.constant_(self.fc2c.bias, 0) 109 | 110 | # self.bn = nn.BatchNorm2d(out_channels) 111 | # bn_init(self.bn, 1) 112 | self.attention = attention 113 | 114 | if in_channels != out_channels: 115 | self.down = nn.Sequential( 116 | nn.Conv2d(in_channels, out_channels, 1), 117 | nn.BatchNorm2d(out_channels) 118 | ) 119 | else: 120 | self.down = lambda x: x 121 | 122 | self.bn = nn.BatchNorm2d(out_channels) 123 | self.soft = nn.Softmax(-2) 124 | self.tan = nn.Tanh() 125 | self.sigmoid = nn.Sigmoid() 126 | self.relu = nn.ReLU(inplace=True) 127 | 128 | for m in self.modules(): 129 | if isinstance(m, nn.Conv2d): 130 | conv_init(m) 131 | elif isinstance(m, nn.BatchNorm2d): 132 | bn_init(m, 1) 133 | bn_init(self.bn, 1e-6) 134 | for i in range(self.num_subset): 135 | conv_branch_init(self.conv_d[i], self.num_subset) 136 | 137 | def forward(self, x): 138 | N, C, T, V = x.size() 139 | 140 | y = None 141 | if self.adaptive: 142 | A = self.PA 143 | # A = A + self.PA 144 | for i in range(self.num_subset): 145 | A1 = self.conv_a[i](x).permute(0, 3, 1, 2).contiguous().view(N, V, self.inter_c * T) 146 | A2 = self.conv_b[i](x).view(N, self.inter_c * T, V) 147 | A1 = self.tan(torch.matmul(A1, A2) / A1.size(-1)) # N V V 148 | A1 = A[i] + A1 * self.alpha 149 | A2 = x.view(N, C * T, V) 150 | z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V)) 151 | y = z + y if y is not None else z 152 | else: 153 | A = self.A.cuda(x.get_device()) * self.mask 154 | for i in range(self.num_subset): 155 | A1 = A[i] 156 | A2 = x.view(N, C * T, V) 157 | z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V)) 158 | y = z + y if y is not None else z 159 | 160 | y = self.bn(y) 161 | y += self.down(x) 162 | y = self.relu(y) 163 | 164 | if self.attention: 165 | # spatial attention 166 | se = y.mean(-2) # N C V 167 | se1 = self.sigmoid(self.conv_sa(se)) 168 | y = y * se1.unsqueeze(-2) + y 169 | # a1 = se1.unsqueeze(-2) 170 | 171 | # temporal attention 172 | se = y.mean(-1) 173 | se1 = self.sigmoid(self.conv_ta(se)) 174 | y = y * se1.unsqueeze(-1) + y 175 | # a2 = se1.unsqueeze(-1) 176 | 177 | # channel attention 178 | se = y.mean(-1).mean(-1) 179 | se1 = self.relu(self.fc1c(se)) 180 | se2 = self.sigmoid(self.fc2c(se1)) 181 | y = y * se2.unsqueeze(-1).unsqueeze(-1) + y 182 | # a3 = se2.unsqueeze(-1).unsqueeze(-1) 183 | 184 | # unified attention 185 | # y = y * self.Attention + y 186 | # y = y + y * ((a2 + a3) / 2) 187 | # y = self.bn(y) 188 | return y 189 | 190 | 191 | class TCN_GCN_unit(nn.Module): 192 | def __init__(self, in_channels, out_channels, A, stride=1, residual=True, adaptive=True, attention=True): 193 | super(TCN_GCN_unit, self).__init__() 194 | self.gcn1 = unit_gcn(in_channels, out_channels, A, adaptive=adaptive, attention=attention) 195 | self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride) 196 | self.relu = nn.ReLU(inplace=True) 197 | # if attention: 198 | # self.alpha = nn.Parameter(torch.zeros(1)) 199 | # self.beta = nn.Parameter(torch.ones(1)) 200 | # temporal attention 201 | # self.conv_ta1 = nn.Conv1d(out_channels, out_channels//rt, 9, padding=4) 202 | # self.bn = nn.BatchNorm2d(out_channels) 203 | # bn_init(self.bn, 1) 204 | # self.conv_ta2 = nn.Conv1d(out_channels, 1, 9, padding=4) 205 | # nn.init.kaiming_normal_(self.conv_ta1.weight) 206 | # nn.init.constant_(self.conv_ta1.bias, 0) 207 | # nn.init.constant_(self.conv_ta2.weight, 0) 208 | # nn.init.constant_(self.conv_ta2.bias, 0) 209 | 210 | # rt = 4 211 | # self.inter_c = out_channels // rt 212 | # self.conv_ta1 = nn.Conv2d(out_channels, out_channels // rt, 1) 213 | # self.conv_ta2 = nn.Conv2d(out_channels, out_channels // rt, 1) 214 | # nn.init.constant_(self.conv_ta1.weight, 0) 215 | # nn.init.constant_(self.conv_ta1.bias, 0) 216 | # nn.init.constant_(self.conv_ta2.weight, 0) 217 | # nn.init.constant_(self.conv_ta2.bias, 0) 218 | # s attention 219 | # num_jpts = A.shape[-1] 220 | # ker_jpt = num_jpts - 1 if not num_jpts % 2 else num_jpts 221 | # pad = (ker_jpt - 1) // 2 222 | # self.conv_sa = nn.Conv1d(out_channels, 1, ker_jpt, padding=pad) 223 | # nn.init.constant_(self.conv_sa.weight, 0) 224 | # nn.init.constant_(self.conv_sa.bias, 0) 225 | 226 | # channel attention 227 | # rr = 16 228 | # self.fc1c = nn.Linear(out_channels, out_channels // rr) 229 | # self.fc2c = nn.Linear(out_channels // rr, out_channels) 230 | # nn.init.kaiming_normal_(self.fc1c.weight) 231 | # nn.init.constant_(self.fc1c.bias, 0) 232 | # nn.init.constant_(self.fc2c.weight, 0) 233 | # nn.init.constant_(self.fc2c.bias, 0) 234 | # 235 | # self.softmax = nn.Softmax(-2) 236 | # self.sigmoid = nn.Sigmoid() 237 | self.attention = attention 238 | 239 | if not residual: 240 | self.residual = lambda x: 0 241 | 242 | elif (in_channels == out_channels) and (stride == 1): 243 | self.residual = lambda x: x 244 | 245 | else: 246 | self.residual = unit_tcn(in_channels, out_channels, kernel_size=1, stride=stride) 247 | 248 | def forward(self, x): 249 | if self.attention: 250 | y = self.relu(self.tcn1(self.gcn1(x)) + self.residual(x)) 251 | 252 | # spatial attention 253 | # se = y.mean(-2) # N C V 254 | # se1 = self.sigmoid(self.conv_sa(se)) 255 | # y = y * se1.unsqueeze(-2) + y 256 | # a1 = se1.unsqueeze(-2) 257 | 258 | # temporal attention 259 | # se = y.mean(-1) # N C T 260 | # # se1 = self.relu(self.bn(self.conv_ta1(se))) 261 | # se2 = self.sigmoid(self.conv_ta2(se)) 262 | # # y = y * se1.unsqueeze(-1) + y 263 | # a2 = se2.unsqueeze(-1) 264 | 265 | # se = y # NCTV 266 | # N, C, T, V = y.shape 267 | # se1 = self.conv_ta1(se).permute(0, 2, 1, 3).contiguous().view(N, T, self.inter_c * V) # NTCV 268 | # se2 = self.conv_ta2(se).permute(0, 1, 3, 2).contiguous().view(N, self.inter_c * V, T) # NCVT 269 | # a2 = self.softmax(torch.matmul(se1, se2) / np.sqrt(se1.size(-1))) # N T T 270 | # y = torch.matmul(y.permute(0, 1, 3, 2).contiguous().view(N, C * V, T), a2) \ 271 | # .view(N, C, V, T).permute(0, 1, 3, 2) * self.alpha + y 272 | 273 | # channel attention 274 | # se = y.mean(-1).mean(-1) 275 | # se1 = self.relu(self.fc1c(se)) 276 | # se2 = self.sigmoid(self.fc2c(se1)) 277 | # # y = y * se2.unsqueeze(-1).unsqueeze(-1) + y 278 | # a3 = se2.unsqueeze(-1).unsqueeze(-1) 279 | # 280 | # y = y * ((a2 + a3) / 2) + y 281 | # y = self.bn(y) 282 | else: 283 | y = self.relu(self.tcn1(self.gcn1(x)) + self.residual(x)) 284 | return y 285 | 286 | 287 | class Model(nn.Module): 288 | def __init__(self, num_class=60, num_point=25, num_person=2, graph=None, graph_args=dict(), in_channels=3, 289 | drop_out=0, adaptive=True, attention=True): 290 | super(Model, self).__init__() 291 | 292 | if graph is None: 293 | raise ValueError() 294 | else: 295 | Graph = import_class(graph) 296 | self.graph = Graph(**graph_args) 297 | 298 | A = self.graph.A 299 | self.num_class = num_class 300 | 301 | self.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point) 302 | 303 | self.l1 = TCN_GCN_unit(3, 64, A, residual=False, adaptive=adaptive, attention=attention) 304 | self.l2 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention) 305 | self.l3 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention) 306 | self.l4 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention) 307 | self.l5 = TCN_GCN_unit(64, 128, A, stride=2, adaptive=adaptive, attention=attention) 308 | self.l6 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention) 309 | self.l7 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention) 310 | self.l8 = TCN_GCN_unit(128, 256, A, stride=2, adaptive=adaptive, attention=attention) 311 | self.l9 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention) 312 | self.l10 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention) 313 | 314 | self.fc = nn.Linear(256, num_class) 315 | nn.init.normal_(self.fc.weight, 0, math.sqrt(2. / num_class)) 316 | bn_init(self.data_bn, 1) 317 | if drop_out: 318 | self.drop_out = nn.Dropout(drop_out) 319 | else: 320 | self.drop_out = lambda x: x 321 | 322 | def forward(self, x): 323 | N, C, T, V, M = x.size() 324 | 325 | x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T) 326 | x = self.data_bn(x) 327 | x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view(N * M, C, T, V) 328 | 329 | x = self.l1(x) 330 | x = self.l2(x) 331 | x = self.l3(x) 332 | x = self.l4(x) 333 | x = self.l5(x) 334 | x = self.l6(x) 335 | x = self.l7(x) 336 | x = self.l8(x) 337 | x = self.l9(x) 338 | x = self.l10(x) 339 | 340 | # N*M,C,T,V 341 | c_new = x.size(1) 342 | x = x.view(N, M, c_new, -1) 343 | x = x.mean(3).mean(1) 344 | x = self.drop_out(x) 345 | 346 | return self.fc(x) 347 | -------------------------------------------------------------------------------- /model/agcn.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | 8 | 9 | def import_class(name): 10 | components = name.split('.') 11 | mod = __import__(components[0]) 12 | for comp in components[1:]: 13 | mod = getattr(mod, comp) 14 | return mod 15 | 16 | 17 | def conv_branch_init(conv, branches): 18 | weight = conv.weight 19 | n = weight.size(0) 20 | k1 = weight.size(1) 21 | k2 = weight.size(2) 22 | nn.init.normal_(weight, 0, math.sqrt(2. / (n * k1 * k2 * branches))) 23 | nn.init.constant_(conv.bias, 0) 24 | 25 | 26 | def conv_init(conv): 27 | nn.init.kaiming_normal_(conv.weight, mode='fan_out') 28 | nn.init.constant_(conv.bias, 0) 29 | 30 | 31 | def bn_init(bn, scale): 32 | nn.init.constant_(bn.weight, scale) 33 | nn.init.constant_(bn.bias, 0) 34 | 35 | 36 | class unit_tcn(nn.Module): 37 | def __init__(self, in_channels, out_channels, kernel_size=9, stride=1): 38 | super(unit_tcn, self).__init__() 39 | pad = int((kernel_size - 1) / 2) 40 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0), 41 | stride=(stride, 1)) 42 | 43 | self.bn = nn.BatchNorm2d(out_channels) 44 | self.relu = nn.ReLU() 45 | conv_init(self.conv) 46 | bn_init(self.bn, 1) 47 | 48 | def forward(self, x): 49 | x = self.bn(self.conv(x)) 50 | return x 51 | 52 | 53 | class unit_gcn(nn.Module): 54 | def __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3): 55 | super(unit_gcn, self).__init__() 56 | inter_channels = out_channels // coff_embedding 57 | self.inter_c = inter_channels 58 | self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32))) 59 | nn.init.constant_(self.PA, 1e-6) 60 | self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False) 61 | self.num_subset = num_subset 62 | 63 | self.conv_a = nn.ModuleList() 64 | self.conv_b = nn.ModuleList() 65 | self.conv_d = nn.ModuleList() 66 | for i in range(self.num_subset): 67 | self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1)) 68 | self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1)) 69 | self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1)) 70 | 71 | if in_channels != out_channels: 72 | self.down = nn.Sequential( 73 | nn.Conv2d(in_channels, out_channels, 1), 74 | nn.BatchNorm2d(out_channels) 75 | ) 76 | else: 77 | self.down = lambda x: x 78 | 79 | self.bn = nn.BatchNorm2d(out_channels) 80 | self.soft = nn.Softmax(-2) 81 | self.relu = nn.ReLU() 82 | 83 | for m in self.modules(): 84 | if isinstance(m, nn.Conv2d): 85 | conv_init(m) 86 | elif isinstance(m, nn.BatchNorm2d): 87 | bn_init(m, 1) 88 | bn_init(self.bn, 1e-6) 89 | for i in range(self.num_subset): 90 | conv_branch_init(self.conv_d[i], self.num_subset) 91 | 92 | def forward(self, x): 93 | N, C, T, V = x.size() 94 | A = self.A.cuda(x.get_device()) 95 | A = A + self.PA 96 | 97 | y = None 98 | for i in range(self.num_subset): 99 | A1 = self.conv_a[i](x).permute(0, 3, 1, 2).contiguous().view(N, V, self.inter_c * T) 100 | A2 = self.conv_b[i](x).view(N, self.inter_c * T, V) 101 | A1 = self.soft(torch.matmul(A1, A2) / A1.size(-1)) # N V V 102 | A1 = A1 + A[i] 103 | A2 = x.view(N, C * T, V) 104 | z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V)) 105 | y = z + y if y is not None else z 106 | 107 | y = self.bn(y) 108 | y += self.down(x) 109 | return self.relu(y) 110 | 111 | 112 | class TCN_GCN_unit(nn.Module): 113 | def __init__(self, in_channels, out_channels, A, stride=1, residual=True): 114 | super(TCN_GCN_unit, self).__init__() 115 | self.gcn1 = unit_gcn(in_channels, out_channels, A) 116 | self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride) 117 | self.relu = nn.ReLU() 118 | if not residual: 119 | self.residual = lambda x: 0 120 | 121 | elif (in_channels == out_channels) and (stride == 1): 122 | self.residual = lambda x: x 123 | 124 | else: 125 | self.residual = unit_tcn(in_channels, out_channels, kernel_size=1, stride=stride) 126 | 127 | def forward(self, x): 128 | x = self.tcn1(self.gcn1(x)) + self.residual(x) 129 | return self.relu(x) 130 | 131 | 132 | class Model(nn.Module): 133 | def __init__(self, num_class=60, num_point=25, num_person=2, graph=None, graph_args=dict(), in_channels=3): 134 | super(Model, self).__init__() 135 | 136 | if graph is None: 137 | raise ValueError() 138 | else: 139 | Graph = import_class(graph) 140 | self.graph = Graph(**graph_args) 141 | 142 | A = self.graph.A 143 | self.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point) 144 | 145 | self.l1 = TCN_GCN_unit(3, 64, A, residual=False) 146 | self.l2 = TCN_GCN_unit(64, 64, A) 147 | self.l3 = TCN_GCN_unit(64, 64, A) 148 | self.l4 = TCN_GCN_unit(64, 64, A) 149 | self.l5 = TCN_GCN_unit(64, 128, A, stride=2) 150 | self.l6 = TCN_GCN_unit(128, 128, A) 151 | self.l7 = TCN_GCN_unit(128, 128, A) 152 | self.l8 = TCN_GCN_unit(128, 256, A, stride=2) 153 | self.l9 = TCN_GCN_unit(256, 256, A) 154 | self.l10 = TCN_GCN_unit(256, 256, A) 155 | 156 | self.fc = nn.Linear(256, num_class) 157 | nn.init.normal_(self.fc.weight, 0, math.sqrt(2. / num_class)) 158 | bn_init(self.data_bn, 1) 159 | 160 | def forward(self, x): 161 | N, C, T, V, M = x.size() 162 | 163 | x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T) 164 | x = self.data_bn(x) 165 | x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view(N * M, C, T, V) 166 | 167 | x = self.l1(x) 168 | x = self.l2(x) 169 | x = self.l3(x) 170 | x = self.l4(x) 171 | x = self.l5(x) 172 | x = self.l6(x) 173 | x = self.l7(x) 174 | x = self.l8(x) 175 | x = self.l9(x) 176 | x = self.l10(x) 177 | 178 | # N*M,C,T,V 179 | c_new = x.size(1) 180 | x = x.view(N, M, c_new, -1) 181 | x = x.mean(3).mean(1) 182 | 183 | return self.fc(x) 184 | --------------------------------------------------------------------------------