├── .directory
├── .gitignore
├── .idea
├── encodings.xml
├── misc.xml
├── modules.xml
├── two-stream-action-recognition.iml
└── vcs.xml
├── Action Recognition Walkthrough.ipynb
├── LICENSE
├── Live_Demo_Two_steam_net.ipynb
├── UCF_list
├── classInd.txt
├── testlist01.txt
├── testlist02.txt
├── testlist03.txt
├── trainlist01.txt
├── trainlist02.txt
└── trainlist03.txt
├── average_fusion_demo.py
├── configs
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-36.pyc
│ └── spatial_configs.cpython-36.pyc
├── motion_configs.py
└── spatial_configs.py
├── evaluate_streams.py
├── evaluation
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-36.pyc
│ └── evaluation.cpython-36.pyc
└── evaluation.py
├── frame_dataloader
├── UCF_splitting_kernel.py
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-36.pyc
│ ├── motion_dataloader.cpython-36.pyc
│ ├── spatial_dataloader.cpython-36.pyc
│ ├── visual_motion_feature_dataloader.cpython-36.pyc
│ └── visual_spatial_feature_dataloader.cpython-36.pyc
├── dic
│ └── frame_count.pickle
├── helpers.py
├── motion_dataloader.py
├── spatial_dataloader.py
├── visual_motion_feature_dataloader.py
└── visual_spatial_feature_dataloader.py
├── generate_motion_feature_dataset.py
├── generate_spatial_feature_dataset.py
├── models
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-36.pyc
│ └── spatial_models.cpython-36.pyc
├── motion_models.py
└── spatial_models.py
├── motion_trainer.py
├── readme.md
├── recurrent_fusion_trainer.py
├── spatial_trainer.py
├── testing video samples
├── v_Archery_g02_c02.avi
├── v_BabyCrawling_g18_c06.avi
├── v_BabyCrawling_g19_c01.avi
├── v_BalanceBeam_g08_c03.avi
├── v_BalanceBeam_g13_c05.avi
├── v_BasketballDunk_g22_c04.avi
├── v_BenchPress_g01_c03.avi
├── v_Biking_g01_c04.avi
├── v_Biking_g10_c02.avi
├── v_Biking_g19_c01.avi
├── v_Biking_g20_c06.avi
├── v_Billiards_g15_c01.avi
├── v_BlowDryHair_g07_c02.avi
├── v_BlowDryHair_g13_c03.avi
├── v_BodyWeightSquats_g01_c03.avi
├── v_BodyWeightSquats_g04_c03.avi
├── v_Bowling_g22_c04.avi
├── v_BoxingPunchingBag_g01_c01.avi
├── v_BoxingPunchingBag_g18_c03.avi
├── v_BoxingSpeedBag_g04_c04.avi
├── v_BoxingSpeedBag_g09_c01.avi
├── v_BoxingSpeedBag_g12_c04.avi
├── v_BoxingSpeedBag_g23_c04.avi
├── v_BreastStroke_g03_c03.avi
├── v_BrushingTeeth_g17_c02.avi
├── v_BrushingTeeth_g20_c03.avi
├── v_CliffDiving_g02_c03.avi
├── v_CricketBowling_g02_c01.avi
├── v_CuttingInKitchen_g20_c04.avi
├── v_CuttingInKitchen_g25_c05.avi
├── v_Diving_g02_c02.avi
├── v_Diving_g03_c07.avi
├── v_Diving_g04_c04.avi
├── v_Diving_g16_c04.avi
├── v_Diving_g20_c04.avi
├── v_Fencing_g15_c01.avi
├── v_Fencing_g15_c04.avi
├── v_FieldHockeyPenalty_g11_c03.avi
├── v_FieldHockeyPenalty_g13_c03.avi
├── v_FrontCrawl_g23_c04.avi
├── v_Haircut_g07_c01.avi
├── v_HammerThrow_g10_c03.avi
├── v_HammerThrow_g23_c05.avi
├── v_Hammering_g12_c03.avi
├── v_Hammering_g17_c05.avi
├── v_HighJump_g02_c01.avi
├── v_HighJump_g19_c05.avi
├── v_HorseRace_g24_c05.avi
├── v_JavelinThrow_g05_c05.avi
├── v_JavelinThrow_g21_c03.avi
├── v_JavelinThrow_g22_c01.avi
├── v_JavelinThrow_g23_c04.avi
├── v_JavelinThrow_g24_c01.avi
├── v_Kayaking_g12_c03.avi
├── v_Knitting_g20_c01.avi
├── v_LongJump_g04_c03.avi
├── v_LongJump_g15_c02.avi
├── v_LongJump_g15_c03.avi
├── v_MoppingFloor_g03_c03.avi
├── v_PizzaTossing_g01_c04.avi
├── v_PizzaTossing_g14_c04.avi
├── v_PizzaTossing_g18_c01.avi
├── v_PlayingCello_g02_c03.avi
├── v_PlayingDaf_g10_c01.avi
├── v_PlayingDhol_g17_c06.avi
├── v_PlayingFlute_g05_c02.avi
├── v_PlayingGuitar_g22_c04.avi
├── v_PlayingTabla_g14_c02.avi
├── v_PoleVault_g04_c02.avi
├── v_PommelHorse_g17_c03.avi
├── v_Punch_g22_c07.avi
├── v_RockClimbingIndoor_g09_c04.avi
├── v_RockClimbingIndoor_g11_c02.avi
├── v_RockClimbingIndoor_g25_c03.avi
├── v_RopeClimbing_g01_c02.avi
├── v_RopeClimbing_g04_c01.avi
├── v_Rowing_g14_c04.avi
├── v_Rowing_g24_c01.avi
├── v_SalsaSpin_g12_c03.avi
├── v_ShavingBeard_g03_c05.avi
├── v_ShavingBeard_g24_c02.avi
├── v_Shotput_g13_c03.avi
├── v_Skiing_g14_c03.avi
├── v_Skijet_g07_c02.avi
├── v_SkyDiving_g05_c04.avi
├── v_SoccerPenalty_g17_c04.avi
├── v_StillRings_g03_c01.avi
├── v_StillRings_g18_c01.avi
├── v_Surfing_g05_c04.avi
├── v_Surfing_g17_c07.avi
├── v_Swing_g14_c04.avi
├── v_TennisSwing_g14_c03.avi
├── v_ThrowDiscus_g02_c04.avi
├── v_Typing_g16_c03.avi
├── v_VolleyballSpiking_g17_c02.avi
├── v_WalkingWithDog_g15_c01.avi
├── v_WallPushups_g01_c04.avi
├── v_WallPushups_g04_c02.avi
├── v_WritingOnBoard_g11_c02.avi
└── v_YoYo_g25_c03.avi
├── upload.sh
└── utils
├── __init__.py
├── __pycache__
├── __init__.cpython-36.pyc
├── drive_manager.cpython-36.pyc
├── training_utils.cpython-36.pyc
└── zip_manager.cpython-36.pyc
├── drive_manager.py
├── training_utils.py
└── zip_manager.py
/.directory:
--------------------------------------------------------------------------------
1 | [Dolphin]
2 | Timestamp=2018,11,14,20,45,31
3 | Version=4
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ./**/__pycache__/
2 |
3 |
4 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
5 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
6 |
7 | .idea/**
8 |
9 | # User-specific stuff
10 | .idea/**/workspace.xml
11 | .idea/**/tasks.xml
12 | .idea/**/usage.statistics.xml
13 | .idea/**/dictionaries
14 | .idea/**/shelf
15 |
16 | # Generated files
17 | .idea/**/contentModel.xml
18 |
19 | # Sensitive or high-churn files
20 | .idea/**/dataSources/
21 | .idea/**/dataSources.ids
22 | .idea/**/dataSources.local.xml
23 | .idea/**/sqlDataSources.xml
24 | .idea/**/dynamic.xml
25 | .idea/**/uiDesigner.xml
26 | .idea/**/dbnavigator.xml
27 |
28 | # Gradle
29 | .idea/**/gradle.xml
30 | .idea/**/libraries
31 |
32 | # Gradle and Maven with auto-import
33 | # When using Gradle or Maven with auto-import, you should exclude module files,
34 | # since they will be recreated, and may cause churn. Uncomment if using
35 | # auto-import.
36 | # .idea/modules.xml
37 | # .idea/*.iml
38 | # .idea/modules
39 | # *.iml
40 | # *.ipr
41 |
42 | # CMake
43 | cmake-build-*/
44 |
45 | # Mongo Explorer plugin
46 | .idea/**/mongoSettings.xml
47 |
48 | # File-based project format
49 | *.iws
50 |
51 | # IntelliJ
52 | out/
53 |
54 | # mpeltonen/sbt-idea plugin
55 | .idea_modules/
56 |
57 | # JIRA plugin
58 | atlassian-ide-plugin.xml
59 |
60 | # Cursive Clojure plugin
61 | .idea/replstate.xml
62 |
63 | # Crashlytics plugin (for Android Studio and IntelliJ)
64 | com_crashlytics_export_strings.xml
65 | crashlytics.properties
66 | crashlytics-build.properties
67 | fabric.properties
68 |
69 | # Editor-based Rest Client
70 | .idea/httpRequests
71 |
72 | # Android studio 3.1+ serialized cache file
73 | .idea/caches/build_file_checksums.ser
74 |
75 |
76 |
77 | # Byte-compiled / optimized / DLL files
78 | __pycache__/
79 | *.py[cod]
80 | *$py.class
81 |
82 | # C extensions
83 | *.so
84 |
85 | # Distribution / packaging
86 | .Python
87 | build/
88 | develop-eggs/
89 | dist/
90 | downloads/
91 | eggs/
92 | .eggs/
93 | lib/
94 | lib64/
95 | parts/
96 | sdist/
97 | var/
98 | wheels/
99 | pip-wheel-metadata/
100 | share/python-wheels/
101 | *.egg-info/
102 | .installed.cfg
103 | *.egg
104 | MANIFEST
105 |
106 | # PyInstaller
107 | # Usually these files are written by a python script from a template
108 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
109 | *.manifest
110 | *.spec
111 |
112 | # Installer logs
113 | pip-log.txt
114 | pip-delete-this-directory.txt
115 |
116 | # Unit test / coverage reports
117 | htmlcov/
118 | .tox/
119 | .nox/
120 | .coverage
121 | .coverage.*
122 | .cache
123 | nosetests.xml
124 | coverage.xml
125 | *.cover
126 | .hypothesis/
127 | .pytest_cache/
128 |
129 | # Translations
130 | *.mo
131 | *.pot
132 |
133 | # Django stuff:
134 | *.log
135 | local_settings.py
136 | db.sqlite3
137 | db.sqlite3-journal
138 |
139 | # Flask stuff:
140 | instance/
141 | .webassets-cache
142 |
143 | # Scrapy stuff:
144 | .scrapy
145 |
146 | # Sphinx documentation
147 | docs/_build/
148 |
149 | # PyBuilder
150 | target/
151 |
152 | # Jupyter Notebook
153 | .ipynb_checkpoints
154 |
155 | # IPython
156 | profile_default/
157 | ipython_config.py
158 |
159 | # pyenv
160 | .python-version
161 |
162 | # pipenv
163 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
164 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
165 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
166 | # install all needed dependencies.
167 | #Pipfile.lock
168 |
169 | # celery beat schedule file
170 | celerybeat-schedule
171 |
172 | # SageMath parsed files
173 | *.sage.py
174 |
175 | # Environments
176 | .env
177 | .venv
178 | env/
179 | venv/
180 | ENV/
181 | env.bak/
182 | venv.bak/
183 |
184 | # Spyder project settings
185 | .spyderproject
186 | .spyproject
187 |
188 | # Rope project settings
189 | .ropeproject
190 |
191 | # mkdocs documentation
192 | /site
193 |
194 | # mypy
195 | .mypy_cache/
196 | .dmypy.json
197 | dmypy.json
198 |
199 | # Pyre type checker
200 | .pyre/
201 |
202 |
203 |
204 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/two-stream-action-recognition.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/UCF_list/classInd.txt:
--------------------------------------------------------------------------------
1 | 1 ApplyEyeMakeup
2 | 2 ApplyLipstick
3 | 3 Archery
4 | 4 BabyCrawling
5 | 5 BalanceBeam
6 | 6 BandMarching
7 | 7 BaseballPitch
8 | 8 Basketball
9 | 9 BasketballDunk
10 | 10 BenchPress
11 | 11 Biking
12 | 12 Billiards
13 | 13 BlowDryHair
14 | 14 BlowingCandles
15 | 15 BodyWeightSquats
16 | 16 Bowling
17 | 17 BoxingPunchingBag
18 | 18 BoxingSpeedBag
19 | 19 BreastStroke
20 | 20 BrushingTeeth
21 | 21 CleanAndJerk
22 | 22 CliffDiving
23 | 23 CricketBowling
24 | 24 CricketShot
25 | 25 CuttingInKitchen
26 | 26 Diving
27 | 27 Drumming
28 | 28 Fencing
29 | 29 FieldHockeyPenalty
30 | 30 FloorGymnastics
31 | 31 FrisbeeCatch
32 | 32 FrontCrawl
33 | 33 GolfSwing
34 | 34 Haircut
35 | 35 Hammering
36 | 36 HammerThrow
37 | 37 HandstandPushups
38 | 38 HandstandWalking
39 | 39 HeadMassage
40 | 40 HighJump
41 | 41 HorseRace
42 | 42 HorseRiding
43 | 43 HulaHoop
44 | 44 IceDancing
45 | 45 JavelinThrow
46 | 46 JugglingBalls
47 | 47 JumpingJack
48 | 48 JumpRope
49 | 49 Kayaking
50 | 50 Knitting
51 | 51 LongJump
52 | 52 Lunges
53 | 53 MilitaryParade
54 | 54 Mixing
55 | 55 MoppingFloor
56 | 56 Nunchucks
57 | 57 ParallelBars
58 | 58 PizzaTossing
59 | 59 PlayingCello
60 | 60 PlayingDaf
61 | 61 PlayingDhol
62 | 62 PlayingFlute
63 | 63 PlayingGuitar
64 | 64 PlayingPiano
65 | 65 PlayingSitar
66 | 66 PlayingTabla
67 | 67 PlayingViolin
68 | 68 PoleVault
69 | 69 PommelHorse
70 | 70 PullUps
71 | 71 Punch
72 | 72 PushUps
73 | 73 Rafting
74 | 74 RockClimbingIndoor
75 | 75 RopeClimbing
76 | 76 Rowing
77 | 77 SalsaSpin
78 | 78 ShavingBeard
79 | 79 Shotput
80 | 80 SkateBoarding
81 | 81 Skiing
82 | 82 Skijet
83 | 83 SkyDiving
84 | 84 SoccerJuggling
85 | 85 SoccerPenalty
86 | 86 StillRings
87 | 87 SumoWrestling
88 | 88 Surfing
89 | 89 Swing
90 | 90 TableTennisShot
91 | 91 TaiChi
92 | 92 TennisSwing
93 | 93 ThrowDiscus
94 | 94 TrampolineJumping
95 | 95 Typing
96 | 96 UnevenBars
97 | 97 VolleyballSpiking
98 | 98 WalkingWithDog
99 | 99 WallPushups
100 | 100 WritingOnBoard
101 | 101 YoYo
102 |
--------------------------------------------------------------------------------
/average_fusion_demo.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | this is a demo fusion of the output predictions of the two streams (the softmax outputs are summed and used for the final score)
6 | those predictions are obtained from the model trained on colab
7 | """
8 | import pickle
9 |
10 | from evaluation.evaluation import video_level_eval
11 | from frame_dataloader import DataUtil
12 |
13 |
14 | def eval_pickles(pickle_files, weights):
15 | if not isinstance(pickle_files, list):
16 | pickle_files = [pickle_files]
17 |
18 | initialized = False
19 | test_video_level_preds = {}
20 | testing_samples_per_video = 0
21 | for index, pickle_file in enumerate(pickle_files):
22 | with open(pickle_file, 'rb') as f:
23 | test_video_level_preds_, testing_samples_per_video_ = pickle.load(f)
24 | if initialized:
25 | if testing_samples_per_video_ != testing_samples_per_video or len(test_video_level_preds) != len(test_video_level_preds_) or set(test_video_level_preds.keys()) != set(test_video_level_preds_.keys()):
26 | print("Pickles doesn't match")
27 | return
28 | else:
29 | for key in test_video_level_preds:
30 | test_video_level_preds[key] += weights[index] * test_video_level_preds_[key]
31 | else:
32 | initialized = True
33 | test_video_level_preds = test_video_level_preds_
34 | for key in test_video_level_preds_:
35 | test_video_level_preds_[key] *= weights[index]
36 | testing_samples_per_video = testing_samples_per_video_
37 |
38 | for key in test_video_level_preds:
39 | test_video_level_preds[key] /= len(pickle_files)
40 |
41 | data_util = DataUtil(path='./UCF_list/', split='01')
42 | _, test_video_to_label_ = data_util.get_train_test_video_to_label_mapping()
43 |
44 | video_level_loss, video_level_accuracy_1, video_level_accuracy_5 = video_level_eval(test_video_level_preds=test_video_level_preds,
45 | test_video_level_label=test_video_to_label_,
46 | testing_samples_per_video=testing_samples_per_video)
47 |
48 | print("prec@1", video_level_accuracy_1, "prec@5", video_level_accuracy_5, "loss", video_level_loss)
49 |
50 |
51 | if __name__ == '__main__':
52 | # Epoch 10 prec@1 0.86122125 prec@5 0.9698652 loss 0.52952474
53 | eval_pickles("../pickles/mot-xception-adam-5e-05-imnet-0.84140.preds", [1])
54 | eval_pickles("../pickles/spa-xception-adam-5e-05-imnet-0.86122.preds", [1])
55 | print("")
56 | eval_pickles("../pickles/mot-xception-adam-5e-05-imnet-0.84140.preds", [5])
57 | eval_pickles("../pickles/spa-xception-adam-5e-05-imnet-0.86122.preds", [5])
58 | print("")
59 | eval_pickles(["../pickles/mot-xception-adam-5e-05-imnet-0.84140.preds"] * 10, [1] * 10)
60 | eval_pickles(["../pickles/spa-xception-adam-5e-05-imnet-0.86122.preds"] * 10, [1] * 10)
61 | print("")
62 | eval_pickles(["../pickles/mot-xception-adam-5e-05-imnet-0.84192.preds", "../pickles/spa-xception-adam-5e-05-imnet-0.86122.preds"], [1] * 2)
63 | eval_pickles(["../pickles/mot-xception-adam-5e-05-imnet-0.84192.preds", "../pickles/spa-xception-adam-5e-06-imnet-0.85964.preds"], [1] * 2)
64 | eval_pickles(["../pickles/mot-xception-adam-5e-05-imnet-0.84192.preds", "../pickles/spa-xception-adam-5e-06-imnet-0.86016.preds"], [1] * 2)
65 | # eval_model_from_disk("spatial.h5", spatial=True, testing_samples_per_video=19)
66 | # eval_model_from_disk("motion.h5", spatial=False, testing_samples_per_video=19)
67 |
--------------------------------------------------------------------------------
/configs/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | """
--------------------------------------------------------------------------------
/configs/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/configs/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/configs/__pycache__/spatial_configs.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/configs/__pycache__/spatial_configs.cpython-36.pyc
--------------------------------------------------------------------------------
/configs/motion_configs.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Configs for motion trainer
6 | comment/uncomment one of these blocks
7 | this includes: pretrained and from scratch resnet/xception/vgg19/mobile net hyper parameters
8 | """
9 | ###############################################################################
10 | """ medium,adam,pretrained,5e-5,resnet """
11 | # is_adam = True
12 | # pretrained = True
13 | # testing_samples_per_video = 19
14 | # lr = 5e-5
15 | # model_name = "resnet" # resnet xception vgg mobilenet
16 | # epochs = 100
17 | # validate_every = 5
18 | # stacked_frames = 10
19 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
20 | ###############################################################################
21 | """ medium,sgd,pretrained,5e-5,resnet """
22 | # is_adam = False
23 | # pretrained = True
24 | # testing_samples_per_video = 19
25 | # lr = 5e-5
26 | # model_name = "resnet" # resnet xception vgg mobilenet
27 | # epochs = 100
28 | # validate_every = 5
29 | # stacked_frames = 10
30 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
31 | ###############################################################################
32 | """ medium,adam,scratch,5e-5,resnet """
33 | # is_adam = True
34 | # pretrained = False
35 | # testing_samples_per_video = 19
36 | # lr = 5e-5
37 | # model_name = "resnet" # resnet xception vgg mobilenet
38 | # epochs = 100
39 | # validate_every = 5
40 | # stacked_frames = 10
41 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
42 | ###############################################################################
43 | """ medium,adam,pretrained,5e-5,xception """
44 | # is_adam = True
45 | # pretrained = True
46 | # testing_samples_per_video = 19
47 | # lr = 5e-5
48 | # model_name = "xception" # resnet xception vgg mobilenet
49 | # epochs = 200
50 | # validate_every = 5
51 | # stacked_frames = 10
52 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
53 | ###############################################################################
54 | """ medium,sgd,pretrained,5e-5,xception """
55 | # is_adam = False
56 | # pretrained = True
57 | # testing_samples_per_video = 19
58 | # lr = 5e-5
59 | # model_name = "xception" # resnet xception vgg mobilenet
60 | # epochs = 100
61 | # validate_every = 5
62 | # stacked_frames = 10
63 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
64 | ###############################################################################
65 | """ medium,adam,pretrained,5e-6,xception"""
66 | # is_adam = True
67 | # pretrained = True
68 | # testing_samples_per_video = 19
69 | # lr = 5e-6
70 | # model_name = "xception" # resnet xception vgg mobilenet
71 | # epochs = 350
72 | # validate_every = 5
73 | # stacked_frames = 10
74 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
75 | ###############################################################################
76 | """ heavy,adam,pretrained,10e-6,xception"""
77 | is_adam = True
78 | pretrained = True
79 | testing_samples_per_video = 19
80 | lr = 10e-6
81 | model_name = "xception" # resnet xception vgg mobilenet
82 | epochs = 350
83 | validate_every = 1
84 | stacked_frames = 10
85 | augmenter_level = 0 # 0 heavy , 1 medium,2 simple
86 | ###############################################################################
87 | """ medium,sgd,pretrained,5e-6,xception"""
88 | # is_adam = False
89 | # pretrained = True
90 | # testing_samples_per_video = 19
91 | # lr = 5e-6
92 | # model_name = "xception" # resnet xception vgg mobilenet
93 | # epochs = 100
94 | # validate_every = 5
95 | # stacked_frames = 10
96 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
97 |
--------------------------------------------------------------------------------
/configs/spatial_configs.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Configs for spatial trainer
6 | comment/uncomment one of these blocks
7 | this includes: pretrained and from scratch resnet/xception/vgg19/mobile net hyper parameters
8 | """
9 | ###############################################################################
10 | """ medium,adam,pretrained,5e-5,resnet 80 ~ 81.2%"""
11 | # is_adam = True
12 | # pretrained = True
13 | # testing_samples_per_video = 19
14 | # lr = 5e-5
15 | # model_name = "resnet" # resnet xception vgg mobilenet
16 | # epochs = 100
17 | # validate_every = 5
18 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
19 | ###############################################################################
20 | """ medium,sgd,pretrained,5e-5,resnet 78.5 ~ 80"""
21 | # is_adam = False
22 | # pretrained = True
23 | # testing_samples_per_video = 19
24 | # lr = 5e-5
25 | # model_name = "resnet" # resnet xception vgg mobilenet
26 | # epochs = 100
27 | # validate_every = 5
28 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
29 | ###############################################################################
30 | """ medium,adam,scratch,5e-5,resnet 0.42215174"""
31 | # is_adam = True
32 | # pretrained = False
33 | # testing_samples_per_video = 19
34 | # lr = 5e-5
35 | # model_name = "resnet" # resnet xception vgg mobilenet
36 | # epochs = 100
37 | # validate_every = 5
38 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
39 | ###############################################################################
40 | """ medium,adam,pretrained,5e-5,xception 86.12%"""
41 | # is_adam = True
42 | # pretrained = True
43 | # testing_samples_per_video = 19
44 | # lr = 5e-5
45 | # model_name = "xception" # resnet xception vgg mobilenet
46 | # epochs = 100
47 | # validate_every = 5
48 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
49 | ###############################################################################
50 | """ medium,sgd,pretrained,5e-5,xception 82%"""
51 | # is_adam = False
52 | # pretrained = True
53 | # testing_samples_per_video = 19
54 | # lr = 5e-5
55 | # model_name = "xception" # resnet xception vgg mobilenet
56 | # epochs = 100
57 | # validate_every = 5
58 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
59 | # ###############################################################################
60 | # """ medium,adam,pretrained,5e-6,xception"""
61 | # is_adam = True
62 | # pretrained = True
63 | # testing_samples_per_video = 19
64 | # lr = 5e-6
65 | # model_name = "xception" # resnet xception vgg mobilenet
66 | # epochs = 175
67 | # validate_every = 5
68 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
69 | # ###############################################################################
70 | """ heavy,adam,pretrained,10e-6,xception"""
71 | is_adam = True
72 | pretrained = True
73 | testing_samples_per_video = 19
74 | lr = 10e-6
75 | model_name = "xception" # resnet xception vgg mobilenet
76 | epochs = 175
77 | validate_every = 1
78 | augmenter_level = 0 # 0 heavy , 1 medium,2 simple
79 | ###############################################################################
80 | """ medium,sgd,pretrained,5e-6,xception"""
81 | # is_adam = False
82 | # pretrained = True
83 | # testing_samples_per_video = 19
84 | # lr = 5e-6
85 | # model_name = "xception" # resnet xception vgg mobilenet
86 | # epochs = 100
87 | # validate_every = 5
88 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
89 | ###############################################################################
90 | """ medium,adam,pretrained,5e-5,vgg"""
91 | # is_adam = True
92 | # pretrained = True
93 | # testing_samples_per_video = 19
94 | # lr = 5e-5
95 | # model_name = "vgg" # resnet xception vgg mobilenet
96 | # epochs = 100
97 | # validate_every = 5
98 | # augmenter_level = 1 # 0 heavy , 1 medium,2 simple
99 |
--------------------------------------------------------------------------------
/evaluate_streams.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Evaluate motion and spatial streams
6 | """
7 | import frame_dataloader
8 | from evaluation import legacy_load_model, get_batch_size
9 | from evaluation.evaluation import *
10 | from utils.drive_manager import DriveManager
11 |
12 | """
13 | Evaluate spatial stream
14 | """
15 | # download
16 | drive_manager = DriveManager("spa-xception-adam-5e-06-imnet")
17 | drive_manager.download_file('1djGzpxAYFvNX-UaQ7ONqDHGgnzc8clBK', "spatial.zip")
18 |
19 | # load into ram
20 | print("Spatial stream")
21 | spatial_model_restored = legacy_load_model(filepath="spatial.h5", custom_objects={'sparse_categorical_cross_entropy_loss': sparse_categorical_cross_entropy_loss, "acc_top_1": acc_top_1, "acc_top_5": acc_top_5})
22 | spatial_model_restored.summary()
23 |
24 | # evaluate
25 | _, spatial_test_loader, test_video_level_label = frame_dataloader.SpatialDataLoader(
26 |
27 | width=int(spatial_model_restored.inputs[0].shape[1]), height=int(spatial_model_restored.inputs[0].shape[2]), batch_size=get_batch_size(spatial_model_restored, spatial=True), testing_samples_per_video=19
28 | ).run()
29 |
30 | video_level_loss, video_level_accuracy_1, video_level_accuracy_5, test_video_level_preds = eval_model(spatial_model_restored, spatial_test_loader, test_video_level_label, 19)
31 | print("Spatial Model validation", "prec@1", video_level_accuracy_1, "prec@5", video_level_accuracy_5, "loss", video_level_loss)
32 |
33 | """
34 | Evaluate motion stream
35 | """
36 | # download
37 | drive_manager = DriveManager("heavy-mot-xception-adam-1e-05-imnet")
38 | drive_manager.download_file('1kvslNL8zmZYaHRmhgAM6-l_pNDDA0EKZ', "motion.zip") # the id of the zip file contains my network
39 |
40 | # load into ram
41 | print("Motion stream")
42 | motion_model_restored = legacy_load_model(filepath="motion.h5", custom_objects={'sparse_categorical_cross_entropy_loss': sparse_categorical_cross_entropy_loss, "acc_top_1": acc_top_1, "acc_top_5": acc_top_5})
43 | motion_model_restored.summary()
44 |
45 | # evaluate
46 | _, motion_test_loader, test_video_level_label = frame_dataloader.MotionDataLoader(
47 |
48 | width=int(motion_model_restored.inputs[0].shape[1]), height=int(motion_model_restored.inputs[0].shape[2])
49 | ,
50 | batch_size=get_batch_size(motion_model_restored, spatial=True)
51 | , testing_samples_per_video=19).run()
52 |
53 | video_level_loss, video_level_accuracy_1, video_level_accuracy_5, _ = eval_model(motion_model_restored, motion_test_loader, test_video_level_label, 19)
54 |
55 | print("Motion Model validation", "prec@1", video_level_accuracy_1, "prec@5", video_level_accuracy_5, "loss", video_level_loss)
56 |
--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | This contains helper functions needed by evaluation
6 | """
7 | import json
8 | import logging
9 | import os
10 |
11 | import h5py
12 | from tensorflow.python.keras import optimizers
13 | from tensorflow.python.keras.engine.saving import model_from_config, load_weights_from_hdf5_group
14 |
15 | is_tesla_k80 = os.path.isdir("/content")
16 |
17 |
18 | # from tensorflow.keras.models import load_model # 1.11.1.rc2
19 | # load model in the new version of tensorflow doesnt work for me and i can't re install older tensorflow-gpu with older cuda for every colab machine :DDD
20 | def legacy_load_model(filepath, custom_objects=None, compile=True): # pylint: disable=redefined-builtin
21 | """
22 | legacy load model since my pretrained models could't be loaded to newer versions of tensorflow
23 | """
24 | if h5py is None:
25 | raise ImportError('`load_model` requires h5py.')
26 |
27 | if not custom_objects:
28 | custom_objects = {}
29 |
30 | def convert_custom_objects(obj):
31 | if isinstance(obj, list):
32 | deserialized = []
33 | for value in obj:
34 | deserialized.append(convert_custom_objects(value))
35 | return deserialized
36 | if isinstance(obj, dict):
37 | deserialized = {}
38 | for key, value in obj.items():
39 | deserialized[key] = convert_custom_objects(value)
40 | return deserialized
41 | if obj in custom_objects:
42 | return custom_objects[obj]
43 | return obj
44 |
45 | opened_new_file = not isinstance(filepath, h5py.File)
46 | if opened_new_file:
47 | f = h5py.File(filepath, mode='r')
48 | else:
49 | f = filepath
50 |
51 | try:
52 | # instantiate model
53 | model_config = f.attrs.get('model_config')
54 | if model_config is None:
55 | raise ValueError('No model found in config file.')
56 | model_config = json.loads(model_config.decode('utf-8'))
57 | model = model_from_config(model_config, custom_objects=custom_objects)
58 |
59 | # set weights
60 | load_weights_from_hdf5_group(f['model_weights'], model.layers)
61 |
62 | if compile:
63 | # instantiate optimizer
64 | training_config = f.attrs.get('training_config')
65 | if training_config is None:
66 | logging.warning('No training configuration found in save file: '
67 | 'the model was *not* compiled. Compile it manually.')
68 | return model
69 | training_config = json.loads(training_config.decode('utf-8'))
70 | optimizer_config = training_config['optimizer_config']
71 | optimizer = optimizers.deserialize(
72 | optimizer_config, custom_objects=custom_objects)
73 |
74 | # Recover loss functions and metrics.
75 | loss = convert_custom_objects(training_config['loss'])
76 | metrics = convert_custom_objects(training_config['metrics'])
77 | sample_weight_mode = training_config['sample_weight_mode']
78 | loss_weights = training_config['loss_weights']
79 |
80 | # Compile model.
81 | model.compile(
82 | optimizer=optimizer,
83 | loss=loss,
84 | metrics=metrics,
85 | loss_weights=loss_weights,
86 | sample_weight_mode=sample_weight_mode)
87 |
88 | # Set optimizer weights.
89 | if 'optimizer_weights' in f:
90 | # Build train function (to get weight updates).
91 | model._make_train_function()
92 | optimizer_weights_group = f['optimizer_weights']
93 | optimizer_weight_names = [
94 | n.decode('utf8')
95 | for n in optimizer_weights_group.attrs['weight_names']
96 | ]
97 | optimizer_weight_values = [
98 | optimizer_weights_group[n] for n in optimizer_weight_names
99 | ]
100 | try:
101 | model.optimizer.set_weights(optimizer_weight_values)
102 | except ValueError:
103 | logging.warning('Error in loading the saved optimizer '
104 | 'state. As a result, your model is '
105 | 'starting with a freshly initialized '
106 | 'optimizer.')
107 | finally:
108 | if opened_new_file:
109 | f.close()
110 | return model
111 |
112 |
113 | def get_batch_size(model_restored, spatial):
114 | """
115 | Helper function to get batch size per model
116 | """
117 | if spatial:
118 | if model_restored.layers[2].__dict__["_name"] == 'resnet50':
119 | batch_size = 76 if is_tesla_k80 else 48
120 | elif model_restored.layers[2].__dict__["_name"] == 'xception':
121 | batch_size = 24 if is_tesla_k80 else 24
122 | elif model_restored.layers[2].__dict__["_name"] == 'vgg19':
123 | batch_size = 36 if is_tesla_k80 else 36
124 | else:
125 | batch_size = 100 if is_tesla_k80 else 100
126 | else:
127 | if model_restored.layers[2].__dict__["_name"] == 'resnet50':
128 | batch_size = 20 if is_tesla_k80 else 20
129 | else:
130 | batch_size = 18 if is_tesla_k80 else 18
131 |
132 | return batch_size
133 |
--------------------------------------------------------------------------------
/evaluation/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/evaluation/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/evaluation/__pycache__/evaluation.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/evaluation/__pycache__/evaluation.cpython-36.pyc
--------------------------------------------------------------------------------
/evaluation/evaluation.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | This contains helper functions needed to
6 | evaluate the model while training
7 | evaluate the model loaded from the disk
8 | evaluate prediction file in pickle format
9 | """
10 |
11 | import multiprocessing
12 | import os
13 | from collections import defaultdict
14 |
15 | import numpy as np
16 | import tensorflow as tf
17 | import tqdm
18 | from tensorflow import keras
19 |
20 | """ Global variables for evaluation """
21 | num_actions = 101
22 | workers = min(multiprocessing.cpu_count(), 4)
23 | is_tesla_k80 = os.path.isdir("/content") # this is true if you are on colab :D
24 |
25 | # keras placeholder used for evaluation
26 | video_level_labels_k = keras.backend.placeholder([None, 1], dtype=tf.float32)
27 | video_level_preds_k = keras.backend.placeholder([None, num_actions], dtype=tf.float32)
28 |
29 | # tensors representing top-1 top-5 and cost function in symbolic form
30 | val_loss_op = keras.backend.mean(keras.metrics.sparse_categorical_crossentropy(video_level_labels_k, video_level_preds_k))
31 | acc_top_1_op = keras.backend.mean(keras.metrics.sparse_top_k_categorical_accuracy(video_level_labels_k, video_level_preds_k, k=1))
32 | acc_top_5_op = keras.backend.mean(keras.metrics.sparse_top_k_categorical_accuracy(video_level_labels_k, video_level_preds_k, k=5))
33 |
34 |
35 | def acc_top_5(y_true, y_pred):
36 | """Helper function for top-5 accuracy reported in UCF"""
37 | y_true = keras.backend.cast(y_true, dtype='int32')
38 |
39 | return keras.backend.mean((tf.keras.metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=5)))
40 |
41 |
42 | def acc_top_1(y_true, y_pred):
43 | """Helper function for top-1 accuracy/(traditional accuracy) reported in UCF"""
44 | print(y_true, y_pred)
45 | y_true = keras.backend.cast(y_true, dtype='int32')
46 | return keras.backend.mean((tf.keras.metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)))
47 |
48 |
49 | # compile the model (should be done *after* setting layers to non-trainable)
50 | def sparse_categorical_cross_entropy_loss(y_true, y_pred):
51 | """Custom loss function:I changed it a little bit but observed no difference"""
52 | print(y_true, y_pred)
53 | y_true = keras.backend.cast(y_true, dtype='int32')
54 | return keras.backend.mean(keras.losses.sparse_categorical_crossentropy(y_true, y_pred))
55 |
56 |
57 | def eval_model(model, test_loader, test_video_level_label, testing_samples_per_video):
58 | """
59 | runs a progressor showing my custom validation per epoch, returning the metrics
60 | """
61 | print("loader",len(test_loader))
62 | progress = tqdm.tqdm(test_loader, total=len(test_loader))
63 | test_video_level_preds = defaultdict(lambda: np.zeros((num_actions,)))
64 |
65 | for i,(video_names, sampled_frame) in enumerate(progress): # i don't need frame level labels
66 | if i == len(progress):
67 | break
68 |
69 | frame_preds = model.predict_on_batch(sampled_frame)
70 | _batch_size = frame_preds.shape[0] # last batch wont be batch_size :3
71 |
72 | for video_id in range(_batch_size): # in batch
73 | video_name = video_names[video_id] # ApplyMakeup_g01_c01 for example
74 | test_video_level_preds[video_name] += frame_preds[video_id]
75 |
76 | video_level_loss, video_level_accuracy_1, video_level_accuracy_5 = video_level_eval(test_video_level_preds=test_video_level_preds,
77 | test_video_level_label=test_video_level_label,
78 | testing_samples_per_video=testing_samples_per_video)
79 |
80 | return video_level_loss, video_level_accuracy_1, video_level_accuracy_5, test_video_level_preds
81 |
82 |
83 | def video_level_eval(test_video_level_preds, test_video_level_label, testing_samples_per_video):
84 | """
85 | video level validation applying accuracy scoring top-5 and top-1 using predictions and labels fed as dictionaries
86 | """
87 | video_level_preds_np = np.zeros((len(test_video_level_preds), num_actions)) # each video per 101 class (prediction)
88 | video_level_labels_np = np.zeros((len(test_video_level_preds), 1))
89 |
90 | for index, video_name in enumerate(sorted(test_video_level_preds.keys())): # this should loop on test videos = 3783 videos
91 | video_summed_preds = test_video_level_preds[video_name] / testing_samples_per_video # average on
92 | video_label = test_video_level_label[video_name] # 0 based label
93 |
94 | video_level_preds_np[index, :] = video_summed_preds
95 | video_level_labels_np[index, 0] = video_label
96 |
97 | video_level_loss, video_level_accuracy_1, video_level_accuracy_5 = keras.backend.get_session().run(
98 | [val_loss_op, acc_top_1_op, acc_top_5_op], feed_dict={video_level_labels_k: video_level_labels_np, video_level_preds_k: video_level_preds_np})
99 |
100 | return video_level_loss, video_level_accuracy_1, video_level_accuracy_5
101 |
--------------------------------------------------------------------------------
/frame_dataloader/UCF_splitting_kernel.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | This contains :
6 | the class responsible for train-test split(video to label mapping) given by UCF101 authors
7 |
8 | look at the notes below
9 | """
10 |
11 | import os
12 | import pickle
13 |
14 |
15 | class DataUtil:
16 | """
17 | Gets video name to label mapping using UCF101 splits
18 | """
19 |
20 | def __init__(self, path, split):
21 | self.path = path
22 | self.split = split
23 |
24 | self.action_to_label = {}
25 | self.init_action_to_label_mapping()
26 |
27 | def init_action_to_label_mapping(self):
28 | with open(os.path.join(self.path, 'classInd.txt')) as f:
29 | class_index_mapping = f.readlines()
30 | class_index_mapping = [x.strip('\n') for x in class_index_mapping]
31 |
32 | for line in class_index_mapping:
33 | label, action = line.split(' ')
34 | self.action_to_label[action] = int(label) - 1 # without v_ or avi(has HandstandPushups) # make it zero based
35 |
36 | def get_train_test_video_to_label_mapping(self):
37 | train_to_label = self.get_video_to_label_mapping("trainlist")
38 | test_to_label = self.get_video_to_label_mapping("testlist")
39 |
40 | return train_to_label, test_to_label
41 |
42 | def get_video_to_label_mapping(self, file):
43 | """warning: trainlist, testlist contains video names called v_HandStandPushups_g16_c03.avi"""
44 | with open(os.path.join(self.path, '{file}{split}.txt'.format(file=file, split=self.split))) as f:
45 | content = f.readlines()
46 | content = [x.strip('\n') for x in content]
47 |
48 | each_video_to_label = {}
49 | for line in content:
50 | video_name = line.split('/', 1)[1] # get video name after /
51 | video_name = video_name.split(' ', 1)[0] # ignore class number 0>1>..> 101 (only trainlist)
52 | video_name = video_name.split('_', 1)[1] # remove v_
53 | video_name = video_name.split('.', 1)[0] # remove .avi
54 | video_name = video_name.replace("HandStandPushups", "HandstandPushups") # look at the warning <
55 | label = self.action_to_label[line.split('/')[0]] # get label index from video_name.. [without v_ or avi get (has HandstandPushups)]
56 | each_video_to_label[video_name] = label # zero based now
57 | return each_video_to_label
58 |
59 | def get_video_frame_count(self):
60 | with open(os.path.join(self.path, "..", "frame_dataloader/dic/frame_count.pickle"), 'rb') as file:
61 | old_video_frame_count = pickle.load(file) # has HandstandPushups_g25_c01 for example (small)
62 |
63 | video_frame_count = {}
64 | for old_video_name in old_video_frame_count:
65 | new_video_name = old_video_name.split('_', 1)[1].split('.', 1)[0] # remove v_ and .avi
66 | video_frame_count[new_video_name] = int(old_video_frame_count[old_video_name]) # name without v_ or .avi (has HandstandPushups)
67 |
68 | return video_frame_count
69 |
70 |
71 | if __name__ == '__main__':
72 | path = '../UCF_list/'
73 | split = '01'
74 | data_util = DataUtil(path=path, split=split)
75 | train_video, test_video = data_util.get_train_test_video_to_label_mapping()
76 | print(len(train_video), len(test_video))
77 |
78 | frames = data_util.get_video_frame_count()
79 |
80 | frame_test, frame_train = {}, {}
81 |
82 | test, train, other = 0, 0, 0
83 | for key, value in frames.items():
84 | if key in test_video:
85 | test += value
86 | frame_test[key] = value
87 | elif key in train_video:
88 | train += value
89 | frame_train[key] = value
90 | else:
91 | other += value
92 | print(test, train, other)
93 |
94 | print(sum(value for key, value in frames.items()))
95 | print(sorted(frame_train.values())[:20])
96 | print(sorted(frame_test.values())[:20])
97 |
98 | # SequenceLoader(sequence_class=CustomSequence, queue_size=100, num_workers=4, use_multiprocessing=True, do_shuffle=True, data=list(range(5)))
99 |
100 |
101 | """Some Important Notes to understand the conflict between the datafolders and splitfile.txt"""
102 | ##########################
103 | # HandstandPushups/v_HandStandPushups_g01_c01.avi (in actual data)
104 | # HandstandPushups/v_HandStandPushups_g01_c01.avi 37 (in train list) <<<< make me small to work with the frame and processed data on disk
105 | ##########################
106 | # v_HandstandPushups_g01_c01.avi(in frame count dict)
107 | # HandstandPushups_g01_c01 (in valid and train dictionaries)
108 | # v_HandstandPushups_g01_c01 (in processed data)
109 | ##########################
110 | # Trainin: mini-batch stochastic gradient descent with momentum (set to 0.9). At each iteration, a mini-batch
111 | # of 256 samples is constructed by sampling 256 training videos (uniformly across the classes), from
112 | # each of which a single frame is randomly selected. In spatial net training, a 224 × 224 sub-image is
113 | # randomly cropped from the selected frame; it then undergoes random horizontal flipping and RGB
114 | # jittering. The videos are rescaled beforehand, so that the smallest side of the frame equals 256. We
115 | # note that unlike [15], the sub-image is sampled from the whole frame, not just its 256 × 256 center.
116 | # In the temporal net training, we compute an optical flow volume I for the selected training frame as
117 | # described in Sect. 3. From that volume, a fixed-size 224 × 224 × 2L input is randomly cropped and
118 | # flipped.
119 | ##########################
120 | # Testing. At test time, given a video, we sample a fixed number of frames (25 in our experiments)
121 | # with equal temporal spacing between them. From each of the frames we then obtain 10 ConvNet
122 | # inputs [15] by cropping and flipping four corners and the center of the frame. The class scores for the
123 | # whole video are then obtained by averaging the scores across the sampled frames and crops therein.
124 | ##########################
125 | # v = vertical
126 | # u = horizontal
127 |
--------------------------------------------------------------------------------
/frame_dataloader/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | """
6 |
7 | from .motion_dataloader import *
8 | from .spatial_dataloader import *
9 |
10 | from .visual_motion_feature_dataloader import *
11 | from .visual_spatial_feature_dataloader import *
12 |
13 |
14 |
--------------------------------------------------------------------------------
/frame_dataloader/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/frame_dataloader/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/frame_dataloader/__pycache__/motion_dataloader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/frame_dataloader/__pycache__/motion_dataloader.cpython-36.pyc
--------------------------------------------------------------------------------
/frame_dataloader/__pycache__/spatial_dataloader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/frame_dataloader/__pycache__/spatial_dataloader.cpython-36.pyc
--------------------------------------------------------------------------------
/frame_dataloader/__pycache__/visual_motion_feature_dataloader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/frame_dataloader/__pycache__/visual_motion_feature_dataloader.cpython-36.pyc
--------------------------------------------------------------------------------
/frame_dataloader/__pycache__/visual_spatial_feature_dataloader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/frame_dataloader/__pycache__/visual_spatial_feature_dataloader.cpython-36.pyc
--------------------------------------------------------------------------------
/frame_dataloader/helpers.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | This Contains:
6 | Helper function for data loaders and augmentation
7 | the sequence loader class: multiprocess/multithread approach for dataloading
8 | """
9 | import os
10 |
11 | import cv2
12 | import numpy as np
13 | from imgaug import augmenters as iaa
14 | from imgaug import parameters as iap
15 |
16 |
17 | def stack_opticalflow(start_frame_index, video_name, data_root_path, stacked_frames): # returns numpy (h,w,stacked*2) = one sample
18 | """
19 | Stacks "stacked_frames" u/v frames on a single numpy array : (h,w,stacked*2)
20 | """
21 | first_optical_frame_u = cv2.imread(os.path.join(data_root_path, "u", "v_" + video_name, 'frame{}'.format(str(start_frame_index).zfill(6)) + '.jpg'), cv2.IMREAD_GRAYSCALE) # horizontal
22 | first_optical_frame_v = cv2.imread(os.path.join(data_root_path, "v", "v_" + video_name, 'frame{}'.format(str(start_frame_index).zfill(6)) + '.jpg'), cv2.IMREAD_GRAYSCALE) # vertical
23 |
24 | stacked_optical_flow_sample = np.zeros(first_optical_frame_u.shape + (2 * stacked_frames,), dtype=np.uint8) # with channel dimension of stacked_frames(u)+ stacked_frames(v)
25 |
26 | stacked_optical_flow_sample[:, :, 0] = first_optical_frame_u
27 | stacked_optical_flow_sample[:, :, 0 + stacked_frames] = first_optical_frame_v
28 |
29 | for index, optical_frame_id in enumerate(range(start_frame_index + 1, start_frame_index + stacked_frames), 1): # index starts at 1 placed after the first one
30 | stacked_optical_flow_sample[:, :, index] = cv2.imread(os.path.join(data_root_path, "u", "v_" + video_name, 'frame{}'.format(str(optical_frame_id).zfill(6)) + '.jpg'), cv2.IMREAD_GRAYSCALE)
31 | stacked_optical_flow_sample[:, :, index + stacked_frames] = cv2.imread(os.path.join(data_root_path, "v", "v_" + video_name, 'frame{}'.format(str(optical_frame_id).zfill(6)) + '.jpg'), cv2.IMREAD_GRAYSCALE)
32 |
33 | return stacked_optical_flow_sample
34 |
35 |
36 | def get_noise_augmenters(augmenter_level):
37 | """
38 | Gets an augmenter object of a given level
39 | """
40 | # 0 heavy , 1 medium,2 simple
41 | if augmenter_level == 0:
42 | ####################################################### heavy augmentation #########################################################################
43 | return [iaa.Sometimes(0.9, iaa.Crop(
44 | percent=((iap.Clip(iap.Normal(0, .5), 0, .6),) * 4) # random crops top,right,bottom,left
45 | )),
46 | # some noise
47 | iaa.Sometimes(0.9, [iaa.GaussianBlur(sigma=(0, 0.3)), iaa.Sharpen(alpha=(0.0, .15), lightness=(0.5, 1.5)), iaa.Emboss(alpha=(0.0, 1.0), strength=(0.1, 0.2))]),
48 | iaa.Sometimes(0.9, iaa.Add((-12, 12), per_channel=1))] # rgb jittering
49 | elif augmenter_level == 1:
50 | ####################################################### medium augmentation #######################################################################
51 | return [iaa.Sometimes(0.9, iaa.Crop(percent=((0.0, 0.15), (0.0, 0.15), (0.0, 0.15), (0.0, 0.15)))), # random crops top,right,bottom,left
52 | # some noise
53 | iaa.Sometimes(0.5, [iaa.GaussianBlur(sigma=(0, 0.25)), iaa.Sharpen(alpha=(0.0, .1), lightness=(0.5, 1.25)), iaa.Emboss(alpha=(0.0, 1.0), strength=(0.05, 0.1))]),
54 | iaa.Sometimes(.7, iaa.Add((-10, 10), per_channel=1))] # rgb jittering
55 | elif augmenter_level == 2:
56 | ######################################################## simple augmentation #######################################################################
57 | return [iaa.Sometimes(0.6, iaa.Crop(percent=((0.0, 0.1), (0.0, 0.1), (0.0, 0.1), (0.0, 0.1)))), # random crops top,right,bottom,left
58 | # some noise
59 | iaa.Sometimes(0.35, [iaa.GaussianBlur(sigma=(0, 0.17)), iaa.Sharpen(alpha=(0.0, .07), lightness=(0.35, 1)), iaa.Emboss(alpha=(0.0, .7), strength=(0.1, 0.7))]),
60 | iaa.Sometimes(.45, iaa.Add((-7, 7), per_channel=1))] # rgb jittering
61 | ###################################################################################################################################################
62 |
63 |
64 | def get_validation_augmenter(height, width):
65 | """
66 | for validation we don't add any stochasticity just resize them to height*width
67 | """
68 | aug = iaa.Sequential([
69 | iaa.Scale({"height": height, "width": width})
70 | ])
71 |
72 | return aug
73 |
74 |
75 | def get_training_augmenter(height, width, augmenter_level):
76 | """
77 | Get validation augmenter according to the level of stochasticity added
78 | """
79 | aug = iaa.Sequential([
80 | iaa.Fliplr(0.5), # horizontal flips
81 | *get_noise_augmenters(augmenter_level), # noisy heavy or simple
82 | iaa.Scale({"height": height, "width": width})
83 | ], random_order=True) # apply augmenters in random order
84 |
85 | return aug
86 |
--------------------------------------------------------------------------------
/frame_dataloader/motion_dataloader.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Motion Dataloader implementing sequence api from keras (defines how to load a single item)
6 | this loads batches of images for each iteration it returns [batch_size, height, width ,stacked_frames*2] ndarrays
7 | 2*stacked_frames since we have u,v optical flow
8 | """
9 | import copy
10 | import random
11 | import sys
12 |
13 | import numpy as np
14 | import tensorflow.keras as keras
15 |
16 | from .UCF_splitting_kernel import *
17 | from .helpers import stack_opticalflow, get_training_augmenter, get_validation_augmenter
18 |
19 |
20 | class MotionSequence(keras.utils.Sequence):
21 | def __init__(self, data_to_load, data_root_path, batch_size, is_training, augmenter, stacked_frames):
22 | """get data structure to load data"""
23 | # list of (video names,frame/max_frame,label)
24 | self.data_to_load = copy.deepcopy(data_to_load)
25 | self.batch_size = batch_size
26 | self.is_training = is_training
27 |
28 | self.augmenter = copy.deepcopy(augmenter)
29 |
30 | self.data_root_path = data_root_path
31 | self.stacked_frames = stacked_frames
32 | self.video_names, self.frames, self.labels = [list(one_of_three_tuples) for one_of_three_tuples in zip(*self.data_to_load)] # three lists
33 |
34 | def __len__(self):
35 | """Denotes the number of batches per epoch"""
36 | return (len(self.video_names) + self.batch_size - 1) // self.batch_size # ceiling div
37 |
38 | def get_actual_length(self):
39 | """Denotes the total number of samples"""
40 | return len(self.video_names)
41 |
42 | def __getitem__(self, batch_start):
43 | """Gets one batch"""
44 | batch_video_names = self.video_names[batch_start * self.batch_size:(batch_start + 1) * self.batch_size]
45 | batch_frames = self.frames[batch_start * self.batch_size:(batch_start + 1) * self.batch_size]
46 | batch_y = np.array(self.labels[batch_start * self.batch_size:(batch_start + 1) * self.batch_size])
47 |
48 | batch_x = [] # could be less or equal batch size
49 |
50 | for vid_id, _ in enumerate(batch_y): # for each sample here
51 |
52 | if self.is_training: # max frame is given
53 | first_optical_frame_id = random.randint(1, batch_frames[vid_id]) # random frame (one based)
54 | else:
55 | first_optical_frame_id = batch_frames[vid_id] # just as selected
56 |
57 | batch_x.append( # append one sample which is (h,w,stacked*2)
58 | stack_opticalflow(start_frame_index=first_optical_frame_id, video_name=batch_video_names[vid_id], data_root_path=self.data_root_path, stacked_frames=self.stacked_frames)
59 | )
60 |
61 | if self.is_training:
62 | return np.array(self.augmenter.augment_images(batch_x), dtype=np.float32) / 255.0, batch_y
63 | else:
64 | # no label needed since (test_video_to_label mapping) (dictionary of name to label) is returned
65 | return batch_video_names, np.array(self.augmenter.augment_images(batch_x), dtype=np.float32) / 255.0
66 |
67 | def shuffle_and_reset(self):
68 | """
69 | new data for the next epoch
70 | """
71 | random.shuffle(self.data_to_load)
72 | self.video_names, self.frames, self.labels = [list(one_of_three_tuples) for one_of_three_tuples in zip(*self.data_to_load)] # shuffle all
73 |
74 |
75 | class MotionDataLoader:
76 | def __init__(self, batch_size, testing_samples_per_video, width, height, augmenter_level=1, log_stream=open("/tmp/null.log", "w"), data_root_path='./tvl1_flow/', ucf_list_path='./UCF_list/', ucf_split='01', stacked_frames=10):
77 | """
78 | get the mapping and initialize the augmenter
79 | """
80 | self.batch_size = batch_size
81 | self.width, self.height = width, height
82 | self.stacked_frames = stacked_frames
83 | self.data_root_path = data_root_path
84 | self.testing_samples_per_video = testing_samples_per_video
85 | self.log_stream = log_stream
86 | # split the training and testing videos
87 | data_util_ = DataUtil(path=ucf_list_path, split=ucf_split)
88 | self.train_video_to_label, self.test_video_to_label = data_util_.get_train_test_video_to_label_mapping() # name without v_ or .avi and small s .. name to numeric label starts at 0
89 |
90 | # get video frames
91 | self.video_frame_count = data_util_.get_video_frame_count() # name without v_ or .avi and small s
92 | self.augmenter_level = augmenter_level
93 |
94 | def run(self):
95 | """
96 | get the data structure for training and validation
97 | """
98 | train_loader = self.get_training_loader()
99 | val_loader = self.get_testing_loader()
100 |
101 | return train_loader, val_loader, self.test_video_to_label
102 |
103 | def get_training_data_structure(self):
104 | """
105 | get the data structure for training
106 | """
107 | training_data_structure = [] # list of (video names,frame/max_frame,label)
108 | for video_name in self.train_video_to_label: # sample from the whole video frames
109 | training_data_structure.append((video_name, self.video_frame_count[video_name] - self.stacked_frames + 1, self.train_video_to_label[video_name])) # we need 10 frames to stack together
110 |
111 | return training_data_structure
112 |
113 | def get_testing_data_structure(self):
114 | """
115 | get the data structure for validation
116 | """
117 | test_data_structure = [] # list of (video names,frame/max_frame,label)
118 | for video_name in self.test_video_to_label:
119 | nb_frame = self.video_frame_count[video_name] - self.stacked_frames + 1 # we need 10 frames to stack together (this will be inclusive)
120 | interval = nb_frame // self.testing_samples_per_video
121 |
122 | if interval == 0: # for videos shorter than self.testing_samples_per_video
123 | interval = 1
124 |
125 | # range is exclusive add one to be inclusive
126 | # 1 > self.testing_samples_per_video * interval inclusive
127 | for frame_idx in range(1, min(self.testing_samples_per_video * interval, nb_frame) + 1, interval):
128 | test_data_structure.append((video_name, frame_idx, self.test_video_to_label[video_name]))
129 |
130 | return test_data_structure
131 |
132 | def get_training_loader(self):
133 | """
134 | an instance of sequence loader for motion model for parallel dataloading using keras sequence
135 | """
136 | loader = MotionSequence(data_to_load=self.get_training_data_structure(),
137 | data_root_path=self.data_root_path,
138 | batch_size=self.batch_size,
139 | is_training=True,
140 | augmenter=get_training_augmenter(height=self.height, width=self.width, augmenter_level=self.augmenter_level),
141 | stacked_frames=self.stacked_frames
142 | )
143 |
144 | print('==> Training data :', len(loader.data_to_load), 'videos', file=self.log_stream)
145 | print('==> Training data :', len(loader.data_to_load), 'videos')
146 | return loader
147 |
148 | def get_testing_loader(self):
149 | """
150 | an instance of sequence loader for motion model for parallel dataloading using keras sequence
151 | """
152 | loader = MotionSequence(data_to_load=self.get_testing_data_structure(),
153 | data_root_path=self.data_root_path,
154 | batch_size=self.batch_size,
155 | is_training=False,
156 | augmenter=get_validation_augmenter(height=self.height, width=self.width),
157 | stacked_frames=self.stacked_frames)
158 |
159 | print('==> Validation data :', len(loader.data_to_load), 'frames', file=self.log_stream)
160 | print('==> Validation data :', len(loader.data_to_load), 'frames')
161 | return loader
162 |
163 |
164 | if __name__ == '__main__':
165 | data_loader = MotionDataLoader(batch_size=64, use_multiprocessing=True,
166 | testing_samples_per_video=19, width=224, height=224, num_workers=1, log_stream=sys.stdout, augmenter_level=1)
167 | train_loader, test_loader, test_video_level_label = data_loader.run()
168 |
169 | print(len(train_loader))
170 | print(len(test_loader))
171 |
172 | print(train_loader.get_actual_length())
173 | print(test_loader.get_actual_length())
174 |
175 | print(train_loader[0][0].shape, train_loader[0][1])
176 |
177 | # import tqdm
178 | # progress = tqdm.tqdm(train_loader.get_epoch_generator(), total=len(train_loader))
179 |
180 | # for (sampled_frame, label) in progress:
181 | # pass
182 |
183 | import matplotlib.pyplot as plt
184 |
185 |
186 | # preview raw data
187 | def preview(data, labels):
188 | # 3 channels
189 | fig, axeslist = plt.subplots(ncols=8, nrows=8, figsize=(10, 10))
190 |
191 | for i, sample in enumerate(data):
192 | axeslist.ravel()[i].imshow(data[i], cmap='gray')
193 | axeslist.ravel()[i].set_title(labels[i])
194 | axeslist.ravel()[i].set_axis_off()
195 |
196 | plt.subplots_adjust(wspace=.4, hspace=.4)
197 |
198 |
199 | for batch in test_loader.get_epoch_generator():
200 | print(batch[0], batch[1].shape, batch[2].shape)
201 | preview(batch[1][:, :, :, 0], batch[2])
202 | preview(batch[1][:, :, :, 1], batch[2])
203 | preview(batch[1][:, :, :, 2], batch[2])
204 | preview(batch[1][:, :, :, 3], batch[2])
205 |
206 | preview(batch[1][:, :, :, 10], batch[2])
207 | preview(batch[1][:, :, :, 11], batch[2])
208 | preview(batch[1][:, :, :, 12], batch[2])
209 | preview(batch[1][:, :, :, 13], batch[2])
210 | break
211 |
212 | for batch in train_loader.get_epoch_generator():
213 | print(batch[0].shape, batch[1].shape)
214 | preview(batch[0][:, :, :, 0], batch[1])
215 | preview(batch[0][:, :, :, 1], batch[1])
216 | preview(batch[0][:, :, :, 2], batch[1])
217 | preview(batch[0][:, :, :, 3], batch[1])
218 |
219 | preview(batch[0][:, :, :, 10], batch[1])
220 | preview(batch[0][:, :, :, 11], batch[1])
221 | preview(batch[0][:, :, :, 12], batch[1])
222 | preview(batch[0][:, :, :, 13], batch[1])
223 | break
224 |
--------------------------------------------------------------------------------
/frame_dataloader/spatial_dataloader.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Spatial Dataloader implementing sequence api from keras (defines how to load a single item)
6 | this loads batches of images for each iteration it returns [batch_size, height, width ,3] ndarrays
7 | """
8 | import copy
9 | import random
10 |
11 | import cv2
12 | import numpy as np
13 | import tensorflow.keras as keras
14 |
15 | from .UCF_splitting_kernel import *
16 | from .helpers import get_training_augmenter, get_validation_augmenter
17 |
18 |
19 | class SpatialSequence(keras.utils.Sequence):
20 | def __init__(self, data_to_load, data_root_path, batch_size, is_training, augmenter):
21 | """get data structure to load data"""
22 | # list of (video names,frame/max_frame,label)
23 | self.data_to_load = copy.deepcopy(data_to_load)
24 | self.batch_size = batch_size
25 | self.is_training = is_training
26 |
27 | self.augmenter = copy.deepcopy(augmenter)
28 |
29 | self.data_root_path = data_root_path
30 |
31 | self.video_names, self.frames, self.labels = [list(one_of_three_tuples) for one_of_three_tuples in zip(*self.data_to_load)] # three lists
32 |
33 | def __len__(self):
34 | """Denotes the number of batches per epoch"""
35 | return (len(self.video_names) + self.batch_size - 1) // self.batch_size # ceiling div
36 |
37 | def get_actual_length(self):
38 | """Denotes the total number of samples"""
39 | return len(self.video_names)
40 |
41 | def __getitem__(self, batch_start):
42 | """Gets one batch"""
43 | batch_video_names = self.video_names[batch_start * self.batch_size:(batch_start + 1) * self.batch_size]
44 | batch_frames = self.frames[batch_start * self.batch_size:(batch_start + 1) * self.batch_size]
45 | batch_y = np.array(self.labels[batch_start * self.batch_size:(batch_start + 1) * self.batch_size])
46 |
47 | batch_x = [] # could be less or equal batch size
48 | #
49 | for vid_id, _ in enumerate(batch_y):
50 | if self.is_training: # max frame is given
51 | frame_id = random.randint(1, batch_frames[vid_id]) # random frame (one based)
52 | else:
53 | frame_id = batch_frames[vid_id] # just as selected
54 |
55 | batch_x.append(
56 | cv2.cvtColor(cv2.imread(os.path.join(self.data_root_path, "v_" + batch_video_names[vid_id], 'frame{}'.format(str(frame_id).zfill(6)) + '.jpg')), cv2.COLOR_BGR2RGB)
57 | )
58 |
59 | if self.is_training:
60 | return np.array(self.augmenter.augment_images(batch_x), dtype=np.float32) / 255.0, batch_y
61 | else:
62 | # no label needed since (test_video_to_label mapping) (dictionary of name to label) is returned
63 | return batch_video_names, np.array(self.augmenter.augment_images(batch_x), dtype=np.float32) / 255.0
64 |
65 | def shuffle_and_reset(self):
66 | """
67 | new data for the next epoch
68 | """
69 | random.shuffle(self.data_to_load)
70 | self.video_names, self.frames, self.labels = [list(one_of_three_tuples) for one_of_three_tuples in zip(*self.data_to_load)] # shuffle all
71 |
72 |
73 | class SpatialDataLoader:
74 | def __init__(self, batch_size, testing_samples_per_video, width, height, log_stream=open("/tmp/null.log", "w"), augmenter_level=1, data_root_path='./jpegs_256/', ucf_list_path='./UCF_list/', ucf_split='01'):
75 | """
76 | get the mapping and initialize the augmenter
77 | """
78 | self.batch_size = batch_size
79 | self.width, self.height = width, height
80 | self.data_root_path = data_root_path
81 | self.testing_samples_per_video = testing_samples_per_video
82 | self.log_stream = log_stream
83 | # split the training and testing videos
84 | data_util_ = DataUtil(path=ucf_list_path, split=ucf_split)
85 | self.train_video_to_label, self.test_video_to_label = data_util_.get_train_test_video_to_label_mapping() # name without v_ or .avi and small s .. name to numeric label starts at 0
86 |
87 | # get video frames
88 | self.video_frame_count = data_util_.get_video_frame_count() # name without v_ or .avi and small s
89 |
90 | self.augmenter_level = augmenter_level
91 |
92 | def run(self):
93 | """
94 | get the data structure for training and validation
95 | """
96 | train_loader = self.get_training_loader()
97 | val_loader = self.get_testing_loader()
98 |
99 | return train_loader, val_loader, self.test_video_to_label
100 |
101 | def get_training_data_structure(self):
102 | """
103 | get the data structure for training
104 | """
105 | training_data_structure = [] # list of (video names,frame/max_frame,label)
106 | for video_name in self.train_video_to_label: # sample from the whole video frames
107 | training_data_structure.append((video_name, self.video_frame_count[video_name], self.train_video_to_label[video_name]))
108 |
109 | return training_data_structure
110 |
111 | def get_testing_data_structure(self):
112 | """
113 | get the data structure for validation
114 | """
115 | test_data_structure = [] # list of (video names,frame/max_frame,label)
116 | for video_name in self.test_video_to_label:
117 | nb_frame = self.video_frame_count[video_name]
118 | interval = nb_frame // self.testing_samples_per_video
119 |
120 | if interval == 0: # for videos shorter than self.testing_samples_per_video
121 | interval = 1
122 |
123 | # range is exclusive add one to be inclusive
124 | # 1 > self.testing_samples_per_video * interval
125 | for frame_idx in range(1, min(self.testing_samples_per_video * interval, nb_frame) + 1, interval):
126 | test_data_structure.append((video_name, frame_idx, self.test_video_to_label[video_name]))
127 |
128 | return test_data_structure
129 |
130 | def get_training_loader(self):
131 | """
132 | an instance of sequence loader for spatial model for parallel dataloading using keras sequence
133 | """
134 | loader = SpatialSequence(data_to_load=self.get_training_data_structure(),
135 | data_root_path=self.data_root_path,
136 | batch_size=self.batch_size,
137 | is_training=True,
138 | augmenter=get_training_augmenter(height=self.height, width=self.width, augmenter_level=self.augmenter_level),
139 | )
140 |
141 | print('==> Training data :', len(loader.data_to_load), 'videos', file=self.log_stream)
142 | print('==> Training data :', len(loader.data_to_load), 'videos')
143 | return loader
144 |
145 | def get_testing_loader(self):
146 | """
147 | an instance of sequence loader for spatial model for parallel dataloading using keras sequence
148 | """
149 |
150 | loader = SpatialSequence(data_to_load=self.get_testing_data_structure(),
151 | data_root_path=self.data_root_path,
152 | batch_size=self.batch_size,
153 | is_training=False,
154 | augmenter=get_validation_augmenter(height=self.height, width=self.width),
155 | )
156 |
157 | print('==> Validation data :', len(loader.data_to_load), 'frames', file=self.log_stream)
158 | print('==> Validation data :', len(loader.data_to_load), 'frames')
159 | return loader
160 |
161 |
162 | if __name__ == '__main__':
163 | data_loader = SpatialDataLoader(batch_size=64, use_multiprocessing=True, # data_root_path="data",
164 | ucf_split='01',
165 | testing_samples_per_video=19, width=224, height=224, num_workers=2)
166 | train_loader, test_loader, test_video_level_label = data_loader.run()
167 |
168 | print(len(train_loader))
169 | print(len(test_loader))
170 |
171 | print(train_loader.get_actual_length())
172 | print(test_loader.get_actual_length())
173 |
174 | print(train_loader.sequence[0][0].shape, train_loader.sequence[0][1].shape)
175 | print(train_loader[0][0].shape, train_loader[0][1].shape)
176 | # import tqdm
177 | # progress = tqdm.tqdm(train_loader.get_epoch_generator(), total=len(train_loader))
178 |
179 | # for (sampled_frame, label) in progress:
180 | # pass
181 |
182 | import matplotlib.pyplot as plt
183 |
184 |
185 | # preview raw data
186 | def preview(data, labels):
187 | # 3 channels
188 | fig, axeslist = plt.subplots(ncols=8, nrows=8, figsize=(10, 10))
189 |
190 | for i, sample in enumerate(data):
191 | axeslist.ravel()[i].imshow(data[i])
192 | axeslist.ravel()[i].set_title(labels[i])
193 | axeslist.ravel()[i].set_axis_off()
194 |
195 | plt.subplots_adjust(wspace=.4, hspace=.4)
196 |
197 |
198 | print("train sample")
199 | for batch in train_loader.get_epoch_generator():
200 | print(batch[0].shape, batch[1].shape)
201 | print(batch[1])
202 | preview(batch[0], batch[1])
203 |
204 | break
205 | print("test sample") # same name will be displayed testing_samples_per_video with no shuffling
206 | for batch in test_loader.get_epoch_generator():
207 | print(batch[1].shape, batch[2].shape)
208 | print(batch[0], batch[2])
209 | preview(batch[1], batch[2])
210 |
211 | break
212 |
--------------------------------------------------------------------------------
/frame_dataloader/visual_motion_feature_dataloader.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Motion visual feature Dataloader implementing sequence api from keras (defines how to load a single item)
6 | we sample "samples_per_video" per video on equal intervals for validation or randomly for training
7 | this loads batches of stacked images(representing a video) for each iteration it returns [samples_per_video, height, width ,stacked_frames*2] ndarrays
8 | 2*stacked_frames since we have u,v optical flow
9 | """
10 | import random
11 | import sys
12 |
13 | import numpy as np
14 | import tensorflow.keras as keras
15 |
16 | from .UCF_splitting_kernel import *
17 | from .helpers import get_training_augmenter, get_validation_augmenter, stack_opticalflow
18 |
19 |
20 | class MotionSequenceFeature(keras.utils.Sequence):
21 | def __init__(self, data_to_load, data_root_path, samples_per_video, is_training, augmenter, stacked_frames):
22 | """get data structure to load data"""
23 | # list of (video names,[frame]/max_frame,label)
24 | self.data_to_load = data_to_load
25 | self.samples_per_video = samples_per_video
26 | self.is_training = is_training
27 |
28 | self.augmenter = augmenter
29 |
30 | self.data_root_path = data_root_path
31 | self.stacked_frames = stacked_frames
32 | self.video_names, self.frames, self.labels = [list(one_of_three_tuples) for one_of_three_tuples in zip(*self.data_to_load)] # three lists
33 |
34 | def __len__(self):
35 | """Denotes the number of batches per epoch"""
36 | return len(self.video_names)
37 |
38 | def get_actual_length(self):
39 | """Denotes the total number of samples"""
40 | return len(self)
41 |
42 | def __getitem__(self, video_index):
43 | """Gets one sample""" # (samples_per_video,h,w,stacked*2)
44 | video_label = self.labels[video_index]
45 | video_name = self.video_names[video_index]
46 |
47 | # start index of each frame so i will stack 19 samples each sample is 20 frames stacked ._.
48 | if self.is_training: # max frame is given
49 | video_frames_start_idx = sorted(random.sample(range(1, self.frames[video_index] + 1), self.samples_per_video)) # sample random frames (samples_per_video) and sort them
50 | else:
51 | video_frames_start_idx = self.frames[video_index] # just as selected list of samples_per_video
52 |
53 | video_frames = [] # could be less or equal batch size
54 |
55 | for video_frame_start_idx in video_frames_start_idx: # for each sample here
56 | video_frames.append(stack_opticalflow(start_frame_index=video_frame_start_idx, video_name=video_name, data_root_path=self.data_root_path, stacked_frames=self.stacked_frames)) # append one sample which is (h,w,stacked*2)
57 |
58 | return np.array(self.augmenter.augment_images(video_frames), dtype=np.float32) / 255.0, video_label
59 |
60 | def shuffle_and_reset(self):
61 | """
62 | new data for the next epoch
63 | """
64 | random.shuffle(self.data_to_load)
65 | self.video_names, self.frames, self.labels = [list(one_of_three_tuples) for one_of_three_tuples in zip(*self.data_to_load)] # shuffle all
66 |
67 |
68 | class MotionDataLoaderVisualFeature:
69 | def __init__(self, samples_per_video, width, height, num_workers, use_multiprocessing, augmenter_level=0, log_stream=open("/tmp/null.log", "w"), data_root_path='./tvl1_flow/', ucf_list_path='./UCF_list/', ucf_split='01', queue_size=10, stacked_frames=10):
70 | """
71 | get the mapping and initialize the augmenter
72 | """
73 | self.samples_per_video = samples_per_video
74 | self.use_multiprocessing = use_multiprocessing
75 | self.queue_size = queue_size
76 | self.num_workers = num_workers
77 |
78 | self.width, self.height = width, height
79 | self.stacked_frames = stacked_frames
80 | self.data_root_path = data_root_path
81 | self.log_stream = log_stream
82 | # split the training and testing videos
83 | data_util_ = DataUtil(path=ucf_list_path, split=ucf_split)
84 | self.train_video_to_label, self.test_video_to_label = data_util_.get_train_test_video_to_label_mapping() # name without v_ or .avi and small s .. name to numeric label starts at 0
85 |
86 | # get video frames
87 | self.video_frame_count = data_util_.get_video_frame_count() # name without v_ or .avi and small s
88 | self.augmenter_level = augmenter_level
89 |
90 | def run(self):
91 | """
92 | get the data structure for training and validation
93 | """
94 | train_loader = self.get_training_loader()
95 | val_loader = self.get_testing_loader()
96 |
97 | return train_loader, val_loader
98 |
99 | def get_training_data_structure(self):
100 | """
101 | get the data structure for training
102 | """
103 | training_data_structure = [] # list of (video names,[frame]/max_frame,label)
104 | for video_name in self.train_video_to_label: # sample from the whole video frames
105 | training_data_structure.append((video_name, self.video_frame_count[video_name] - self.stacked_frames + 1, self.train_video_to_label[video_name])) # we need 10 frames to stack together
106 |
107 | return training_data_structure
108 |
109 | def get_testing_data_structure(self):
110 | """
111 | get the data structure for validation
112 | """
113 | test_data_structure = [] # list of (video names,[frame]/max_frame,label)
114 | for video_name in self.test_video_to_label:
115 | nb_frame = self.video_frame_count[video_name] - self.stacked_frames + 1 # we need 10 frames to stack together (this will be inclusive)
116 | interval = nb_frame // self.samples_per_video
117 |
118 | if interval == 0: # for videos shorter than self.testing_samples_per_video
119 | interval = 1
120 |
121 | # range is exclusive add one to be inclusive
122 | # 1 > self.testing_samples_per_video * interval inclusive
123 | sampled_frames = []
124 | for frame_idx in range(1, min(self.samples_per_video * interval, nb_frame) + 1, interval):
125 | sampled_frames.append(frame_idx)
126 |
127 | test_data_structure.append((video_name, sampled_frames, self.test_video_to_label[video_name]))
128 |
129 | return test_data_structure
130 |
131 | def get_training_loader(self):
132 | """
133 | an instance of sequence loader for motion model for parallel dataloading using keras sequence
134 | """
135 | loader = MotionSequenceFeature(data_to_load=self.get_training_data_structure(),
136 | data_root_path=self.data_root_path,
137 | samples_per_video=self.samples_per_video,
138 | is_training=True,
139 | augmenter=get_training_augmenter(height=self.height, width=self.width, augmenter_level=self.augmenter_level),
140 | stacked_frames=self.stacked_frames)
141 |
142 | print('==> Training data :', len(loader.data_to_load), 'videos', file=self.log_stream)
143 | print('==> Training data :', len(loader.data_to_load), 'videos')
144 | return loader
145 |
146 | def get_testing_loader(self):
147 | """
148 | an instance of sequence loader for motion model for parallel dataloading using keras sequence
149 | """
150 | loader = MotionSequenceFeature( data_to_load=self.get_testing_data_structure(),
151 | data_root_path=self.data_root_path,
152 | samples_per_video=self.samples_per_video,
153 | is_training=False,
154 | augmenter=get_validation_augmenter(height=self.height, width=self.width),
155 | stacked_frames=self.stacked_frames
156 | )
157 | print('==> Validation data :', len(loader.data_to_load), 'frames', file=self.log_stream)
158 | print('==> Validation data :', len(loader.data_to_load), 'frames')
159 | return loader
160 |
161 |
162 | if __name__ == '__main__':
163 | data_loader = MotionDataLoaderVisualFeature(samples_per_video=19, use_multiprocessing=True, ucf_list_path='../UCF_list/',
164 | width=224, height=224, num_workers=1, log_stream=sys.stdout, heavy=False)
165 | train_loader, test_loader, test_video_level_label = data_loader.run()
166 |
167 | print(len(train_loader))
168 | print(len(test_loader))
169 |
170 | print(train_loader.get_actual_length())
171 | print(test_loader.get_actual_length())
172 |
173 | print(train_loader[0][0].shape, train_loader[0][1])
174 |
175 | # import tqdm
176 | # progress = tqdm.tqdm(train_loader.get_epoch_generator(), total=len(train_loader))
177 |
178 | # for (sampled_frame, label) in progress:
179 | # pass
180 |
181 | import matplotlib.pyplot as plt
182 |
183 |
184 | # preview raw data
185 | def preview(data, labels):
186 | # 3 channels
187 | fig, axeslist = plt.subplots(ncols=8, nrows=8, figsize=(10, 10))
188 |
189 | for i, sample in enumerate(data):
190 | axeslist.ravel()[i].imshow(data[i], cmap='gray')
191 | axeslist.ravel()[i].set_title(labels[i])
192 | axeslist.ravel()[i].set_axis_off()
193 |
194 | plt.subplots_adjust(wspace=.4, hspace=.4)
195 |
196 |
197 | for batch in test_loader.get_epoch_generator():
198 | print(batch[0], batch[1].shape, batch[2].shape)
199 | preview(batch[1][:, :, :, 0], batch[2])
200 | preview(batch[1][:, :, :, 1], batch[2])
201 | preview(batch[1][:, :, :, 2], batch[2])
202 | preview(batch[1][:, :, :, 3], batch[2])
203 |
204 | preview(batch[1][:, :, :, 10], batch[2])
205 | preview(batch[1][:, :, :, 11], batch[2])
206 | preview(batch[1][:, :, :, 12], batch[2])
207 | preview(batch[1][:, :, :, 13], batch[2])
208 | break
209 |
210 | for batch in train_loader.get_epoch_generator():
211 | print(batch[0].shape, batch[1].shape)
212 | preview(batch[0][:, :, :, 0], batch[1])
213 | preview(batch[0][:, :, :, 1], batch[1])
214 | preview(batch[0][:, :, :, 2], batch[1])
215 | preview(batch[0][:, :, :, 3], batch[1])
216 |
217 | preview(batch[0][:, :, :, 10], batch[1])
218 | preview(batch[0][:, :, :, 11], batch[1])
219 | preview(batch[0][:, :, :, 12], batch[1])
220 | preview(batch[0][:, :, :, 13], batch[1])
221 | break
222 |
--------------------------------------------------------------------------------
/frame_dataloader/visual_spatial_feature_dataloader.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Spatial visual feature Dataloader implementing sequence api from keras (defines how to load a single item)
6 | we sample "samples_per_video" per video on equal intervals for validation or randomly for training
7 | this loads batches of stacked images(representing a video) for each iteration it returns [samples_per_video, height, width ,3] ndarrays
8 | """
9 | import random
10 |
11 | import cv2
12 | import numpy as np
13 | import tensorflow.keras as keras
14 |
15 | from .UCF_splitting_kernel import *
16 | from .helpers import get_training_augmenter, get_validation_augmenter
17 |
18 |
19 | class SpatialSequenceFeature(keras.utils.Sequence):
20 | def __init__(self, data_to_load, data_root_path, samples_per_video, is_training, augmenter):
21 | """get data structure to load data"""
22 | # list of (video names,[frame]/max_frame,label)
23 | self.data_to_load = data_to_load
24 | self.samples_per_video = samples_per_video
25 | self.is_training = is_training
26 |
27 | self.augmenter = augmenter
28 |
29 | self.data_root_path = data_root_path
30 |
31 | self.video_names, self.frames, self.labels = [list(one_of_three_tuples) for one_of_three_tuples in zip(*self.data_to_load)] # three lists
32 |
33 | def __len__(self):
34 | """Denotes the number of batches per epoch"""
35 | return len(self.video_names)
36 |
37 | def get_actual_length(self):
38 | """Denotes the total number of samples"""
39 | return len(self)
40 |
41 | def __getitem__(self, video_index):
42 | """Gets one batch"""
43 | video_label = self.labels[video_index]
44 | video_name = self.video_names[video_index]
45 |
46 | if self.is_training: # max frame is given
47 | video_frames_idx = sorted(random.sample(range(1, self.frames[video_index] + 1), self.samples_per_video)) # sample random frames (samples_per_video) and sort them
48 | else:
49 | video_frames_idx = self.frames[video_index] # just as selected list of samples_per_video
50 |
51 | video_frames = [] # could be less or equal batch size
52 |
53 | for video_frame_id in video_frames_idx: # for each sample here
54 | video_frames.append(
55 | cv2.cvtColor(cv2.imread(os.path.join(self.data_root_path, "v_" + video_name, 'frame{}'.format(str(video_frame_id).zfill(6)) + '.jpg')), cv2.COLOR_BGR2RGB)
56 | )
57 |
58 | return np.array(self.augmenter.augment_images(video_frames), dtype=np.float32) / 255.0, video_label
59 |
60 | def shuffle_and_reset(self):
61 | """
62 | new data for the next epoch
63 | """
64 | random.shuffle(self.data_to_load)
65 | self.video_names, self.frames, self.labels = [list(one_of_three_tuples) for one_of_three_tuples in zip(*self.data_to_load)] # shuffle all
66 |
67 |
68 | class SpatialDataLoaderFeature:
69 | def __init__(self, samples_per_video, width, height, num_workers, use_multiprocessing, log_stream=open("/tmp/null.log", "w"), augmenter_level=0, data_root_path='./jpegs_256/', ucf_list_path='./UCF_list/', ucf_split='01', queue_size=10):
70 | """
71 | get the mapping and initialize the augmenter
72 | """
73 | self.samples_per_video = samples_per_video
74 | self.use_multiprocessing = use_multiprocessing
75 | self.queue_size = queue_size
76 | self.num_workers = num_workers
77 |
78 | self.width, self.height = width, height
79 | self.data_root_path = data_root_path
80 |
81 | self.log_stream = log_stream
82 | # split the training and testing videos
83 | data_util_ = DataUtil(path=ucf_list_path, split=ucf_split)
84 | self.train_video_to_label, self.test_video_to_label = data_util_.get_train_test_video_to_label_mapping() # name without v_ or .avi and small s .. name to numeric label starts at 0
85 |
86 | # get video frames
87 | self.video_frame_count = data_util_.get_video_frame_count() # name without v_ or .avi and small s
88 |
89 | self.augmenter_level = augmenter_level
90 |
91 | def run(self):
92 | """
93 | get the data structure for training and validation
94 | """
95 | train_loader = self.get_training_loader()
96 | val_loader = self.get_testing_loader()
97 |
98 | return train_loader, val_loader
99 |
100 | def get_training_data_structure(self):
101 | """
102 | get the data structure for training
103 | """
104 | training_data_structure = [] # list of (video names,[frame]/max_frame,label)
105 | for video_name in self.train_video_to_label: # sample from the whole video frames
106 | training_data_structure.append((video_name, self.video_frame_count[video_name], self.train_video_to_label[video_name]))
107 |
108 | return training_data_structure
109 |
110 | def get_testing_data_structure(self):
111 | """
112 | get the data structure for validation
113 | """
114 | test_data_structure = [] # list of (video names,[frame]/max_frame,label)
115 | for video_name in self.test_video_to_label:
116 | nb_frame = self.video_frame_count[video_name]
117 | interval = nb_frame // self.samples_per_video
118 |
119 | if interval == 0: # for videos shorter than self.testing_samples_per_video
120 | interval = 1
121 |
122 | # range is exclusive add one to be inclusive
123 | # 1 > self.testing_samples_per_video * interval inclusive
124 | sampled_frames = []
125 | for frame_idx in range(1, min(self.samples_per_video * interval, nb_frame) + 1, interval):
126 | sampled_frames.append(frame_idx)
127 |
128 | test_data_structure.append((video_name, sampled_frames, self.test_video_to_label[video_name]))
129 |
130 | return test_data_structure
131 |
132 | def get_training_loader(self):
133 | """
134 | an instance of sequence loader for motion model for parallel dataloading using keras sequence
135 | """
136 | loader = SpatialSequenceFeature(data_to_load=self.get_training_data_structure(),
137 | data_root_path=self.data_root_path,
138 | samples_per_video=self.samples_per_video,
139 | is_training=True,
140 | augmenter=get_training_augmenter(height=self.height, width=self.width, augmenter_level=self.augmenter_level),
141 | )
142 |
143 | print('==> Training data :', len(loader.data_to_load), 'videos', file=self.log_stream)
144 | print('==> Training data :', len(loader.data_to_load), 'videos')
145 | return loader
146 |
147 | def get_testing_loader(self):
148 | """
149 | an instance of sequence loader for motion model for parallel dataloading using keras sequence
150 | """
151 |
152 | loader = SpatialSequenceFeature(data_to_load=self.get_testing_data_structure(),
153 | data_root_path=self.data_root_path,
154 | samples_per_video=self.samples_per_video,
155 | is_training=False,
156 | augmenter=get_validation_augmenter(height=self.height, width=self.width),
157 | )
158 |
159 | print('==> Validation data :', len(loader.data_to_load), 'frames', file=self.log_stream)
160 | print('==> Validation data :', len(loader.data_to_load), 'frames')
161 | return loader
162 |
163 |
164 | if __name__ == '__main__':
165 | data_loader = SpatialDataLoaderFeature(samples_per_video=19, use_multiprocessing=True, # data_root_path="data",
166 | ucf_split='01', ucf_list_path='../UCF_list/',
167 | width=299, height=299, num_workers=2)
168 | train_loader, test_loader, test_video_level_label = data_loader.run()
169 |
170 | print(len(train_loader))
171 | print(len(test_loader))
172 |
173 | print(train_loader.get_actual_length())
174 | print(test_loader.get_actual_length())
175 |
176 | print(train_loader.sequence[0][0].shape, train_loader.sequence[0][1].shape)
177 | print(train_loader[0][0].shape, train_loader[0][1].shape)
178 | # import tqdm
179 | # progress = tqdm.tqdm(train_loader.get_epoch_generator(), total=len(train_loader))
180 |
181 | # for (sampled_frame, label) in progress:
182 | # pass
183 |
184 | import matplotlib.pyplot as plt
185 |
186 |
187 | # preview raw data
188 | def preview(data, labels):
189 | # 3 channels
190 | fig, axeslist = plt.subplots(ncols=8, nrows=8, figsize=(10, 10))
191 |
192 | for i, sample in enumerate(data):
193 | axeslist.ravel()[i].imshow(data[i])
194 | axeslist.ravel()[i].set_title(labels[i])
195 | axeslist.ravel()[i].set_axis_off()
196 |
197 | plt.subplots_adjust(wspace=.4, hspace=.4)
198 |
199 |
200 | print("train sample")
201 | for batch in train_loader.get_epoch_generator():
202 | print(batch[0].shape, batch[1].shape)
203 | print(batch[1])
204 | preview(batch[0], batch[1])
205 |
206 | break
207 | print("test sample") # same name will be displayed testing_samples_per_video with no shuffling
208 | for batch in test_loader.get_epoch_generator():
209 | print(batch[1].shape, batch[2].shape)
210 | print(batch[0], batch[2])
211 | preview(batch[1], batch[2])
212 |
213 | break
214 |
--------------------------------------------------------------------------------
/generate_motion_feature_dataset.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Here I'm generating visual features from my pretrained motion stream having 84% top-1 accuracy
6 | these visual features are just the layer below the softmax prediction, layer have 2048 features for each input image
7 | the data generated are stored into a big list and then dumped as pickle file for each epoch of data (more data augmentation)
8 | then this data are fed into a recurrent network implemented in recurrent_fusion_trainer.py file to train a video level classifier instead of frame level classier
9 |
10 | I expect a pre-trained xception model here to be downloaded from drive
11 | -------------------
12 | In this file I do what I call model surgery which is removing or adding some layer from a model
13 | Here I load my trained model whose architecture is
14 |
15 | Input_image > batch_norm >> xception model as layer >>> softmax layer of 101 classes
16 |
17 |
18 | so I can change the model a little bit and make it have 2 outputs which are the features just below the softmax and the softmax
19 | so the model becomes
20 |
21 | Input_image > batch_norm >> xception model as layer >>> softmax layer of 101 classes
22 | # >>> feature field of 2048 features
23 |
24 | which are two outputs now
25 | """
26 | import pickle
27 |
28 | from tensorflow.python.keras import Model, Input
29 |
30 | import frame_dataloader
31 | from evaluation import legacy_load_model
32 | from evaluation.evaluation import *
33 | from utils.drive_manager import DriveManager
34 |
35 | #####################################################
36 | feature_field_size = 2048
37 | testing_samples_per_video = 19
38 | #####################################################
39 | """Managed"""
40 | evaluate = False
41 | generate_test = False
42 |
43 | drive_manager = DriveManager("motion_feature_dataset")
44 | drive_manager.download_file('1O8OM6Q01az_71HdMQmWM3op1qJhfsQoI', "motion.zip") # the id of the zip file contains my network
45 |
46 | motion_model_restored = legacy_load_model(filepath="motion.h5", custom_objects={'sparse_categorical_cross_entropy_loss': sparse_categorical_cross_entropy_loss, "acc_top_1": acc_top_1, "acc_top_5": acc_top_5})
47 | motion_model_restored.summary()
48 | # xception here is a layer
49 | # The architecture summary is
50 | # input_image > batch_norm > xception layer
51 | xception_rebuilt = Model(
52 | motion_model_restored.layers[-1].layers[0].input, # input image to xception layer itself not my wrapper model
53 | [layer.output for layer in motion_model_restored.layers[-1].layers[-2:]] # two outputs of xception layer itself visual features, softmax output
54 | )
55 |
56 | motion_model_with_2_outputs = Model(
57 | motion_model_restored.inputs[0], # input of my wrapper model
58 | xception_rebuilt(motion_model_restored.layers[1](motion_model_restored.inputs[0])) # the two outputs obtained from xception layer are connected to the original input of the wrapper model
59 |
60 | )
61 |
62 | data_loader = frame_dataloader.MotionDataLoaderVisualFeature(
63 | num_workers=workers, samples_per_video=19,
64 | width=int(motion_model_restored.inputs[0].shape[1]), height=int(motion_model_restored.inputs[0].shape[2])
65 | , use_multiprocessing=True, augmenter_level=0, # heavy augmentation
66 | )
67 | train_loader, test_loader = data_loader.run()
68 |
69 | """
70 | Evaluate and check
71 | """
72 | if evaluate:
73 | progress = tqdm.tqdm(test_loader, total=len(test_loader))
74 | inp = Input(shape=(2048,), name="dense")
75 | dense_layer = Model(inp, motion_model_restored.layers[-1].layers[-1](inp))
76 |
77 | video_level_preds_np = np.zeros((len(progress), num_actions)) # each video per 101 class (prediction)
78 | video_level_labels_np = np.zeros((len(progress), 1))
79 |
80 | for index, (video_frames, video_label) in enumerate(progress): # i don't need frame level labels
81 | feature_field, frame_preds = motion_model_with_2_outputs.predict_on_batch(video_frames)
82 | assert np.allclose(frame_preds, dense_layer.predict(feature_field))
83 |
84 | video_level_preds_np[index, :] = np.mean(frame_preds, axis=0)
85 | video_level_labels_np[index, 0] = video_label
86 |
87 | video_level_loss, video_level_accuracy_1, video_level_accuracy_5 = keras.backend.get_session().run(
88 | [val_loss_op, acc_top_1_op, acc_top_5_op], feed_dict={video_level_labels_k: video_level_labels_np, video_level_preds_k: video_level_preds_np})
89 |
90 | print("Motion Model validation", "prec@1", video_level_accuracy_1, "prec@5", video_level_accuracy_5, "loss", video_level_loss)
91 |
92 | """
93 | Generate the data and save into pickles
94 | """
95 | ##############################################################################
96 | # test data generation
97 | if generate_test:
98 | test_progress = tqdm.tqdm(test_loader, total=len(test_loader))
99 |
100 | samples, labels = np.zeros([len(test_loader), testing_samples_per_video, feature_field_size], dtype=np.float32), np.zeros([len(test_loader), ], dtype=np.float32)
101 |
102 | last_access = 0
103 | for index, (video_frames, video_label) in enumerate(test_progress): # i don't need frame level labels
104 | feature_field, _ = motion_model_with_2_outputs.predict_on_batch(video_frames)
105 | samples[index] = feature_field
106 | labels[index] = video_label
107 | last_access = index
108 |
109 | print("test samples:", samples.shape)
110 | print("test labels:", labels.shape)
111 | assert last_access == len(test_progress) - 1
112 |
113 | with open("test_features_motion.pickle", 'wb') as f:
114 | pickle.dump((samples, labels), f)
115 |
116 | del samples, labels
117 | drive_manager.upload_project_file("test_features_motion.pickle")
118 |
119 | ##############################################################################
120 | # train data generation
121 | for epoch in range(1):
122 | train_progress = tqdm.tqdm(train_loader, total=len(train_loader))
123 | samples, labels = np.zeros([len(train_loader), testing_samples_per_video, feature_field_size], dtype=np.float32), np.zeros([len(train_loader), ], dtype=np.float32)
124 |
125 | last_access = 0
126 | for index, (video_frames, video_label) in enumerate(train_progress): # i don't need frame level labels
127 | feature_field, _ = motion_model_with_2_outputs.predict_on_batch(video_frames)
128 | samples[index] = feature_field
129 | labels[index] = video_label
130 | last_access = index
131 |
132 | print("train samples:", samples.shape)
133 | print("train labels:", labels.shape)
134 | assert last_access == len(train_loader) - 1
135 |
136 | with open("train_features_motion.pickle", 'wb') as f:
137 | pickle.dump((samples, labels), f)
138 |
139 | del samples, labels
140 | drive_manager.upload_project_file("train_features_motion.pickle")
141 | ##############################################################################
142 |
--------------------------------------------------------------------------------
/generate_spatial_feature_dataset.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Here I'm generating visual features from my pretrained spatial stream having 8% top-1 accuracy
6 | these visual features are just the layer below the softmax prediction, layer have 2048 features for each input image
7 | the data generated are stored into a big list and then dumped as pickle file for each epoch of data (more data augmentation)
8 | then this data are fed into a recurrent network implemented in recurrent_fusion_trainer.py file to train a video level classifier instead of frame level classier
9 |
10 | I expect a pre-trained xception model here to be downloaded from drive
11 | -------------------
12 | In this file I do what I call model surgery which is removing or adding some layer from a model
13 | Here I load my trained model whose architecture is
14 |
15 | Input_image > batch_norm >>> ...... very deep series of convs from xception model ... >>> softmax layer of 101 classes
16 |
17 | so I can change the model a little bit and make it have 2 outputs which are the features just below the softmax and the softmax
18 | so the model becomes
19 |
20 | Input_image > batch_norm >>> ...... very deep series of convs from xception model ... >>> softmax layer of 101 classes
21 | # >>> feature field of 2048 features
22 |
23 | which are two outputs now
24 | """
25 | import pickle
26 |
27 | from tensorflow.keras.models import Model
28 | from tensorflow.python.keras import Input
29 |
30 | import frame_dataloader
31 | from evaluation import legacy_load_model
32 | from evaluation.evaluation import *
33 | from utils.drive_manager import DriveManager
34 |
35 | #####################################################
36 | feature_field_size = 2048
37 | testing_samples_per_video = 19
38 | #####################################################
39 |
40 | """Managed"""
41 | evaluate = False
42 | generate_test = False
43 |
44 | drive_manager = DriveManager("spatial_feature_dataset")
45 | drive_manager.download_file('17O8JdvaSNJFmbvZtQPIBYNLgM9Um-znf', "spatial.zip")
46 | spatial_model_restored = legacy_load_model(filepath="spatial.h5", custom_objects={'sparse_categorical_cross_entropy_loss': sparse_categorical_cross_entropy_loss, "acc_top_1": acc_top_1, "acc_top_5": acc_top_5})
47 |
48 | spatial_model_restored.summary()
49 |
50 | spatial_model_with_2_outputs = Model(
51 | spatial_model_restored.inputs, # input image
52 | [layer.output for layer in spatial_model_restored.layers[-2:]] # visual features, softmax output
53 | )
54 |
55 | data_loader = frame_dataloader.SpatialDataLoaderFeature(
56 | num_workers=workers, samples_per_video=19,
57 | width=int(spatial_model_restored.inputs[0].shape[1]), height=int(spatial_model_restored.inputs[0].shape[2])
58 | , use_multiprocessing=True, augmenter_level=0, # heavy augmentation
59 | )
60 | train_loader, test_loader = data_loader.run()
61 |
62 | """
63 | Evaluate and check
64 | """
65 | if evaluate:
66 | progress = tqdm.tqdm(test_loader, total=len(test_loader))
67 | inp = Input(shape=(2048,), name="dense")
68 | dense_layer = Model(inp, spatial_model_restored.layers[-1](inp))
69 |
70 | video_level_preds_np = np.zeros((len(progress), num_actions)) # each video per 101 class (prediction)
71 | video_level_labels_np = np.zeros((len(progress), 1))
72 |
73 | for index, (video_frames, video_label) in enumerate(progress): # i don't need frame level labels
74 | feature_field, frame_preds = spatial_model_with_2_outputs.predict_on_batch(video_frames)
75 |
76 | assert np.allclose(frame_preds, dense_layer.predict(feature_field))
77 | video_level_preds_np[index, :] = np.mean(frame_preds, axis=0)
78 | video_level_labels_np[index, 0] = video_label
79 |
80 | video_level_loss, video_level_accuracy_1, video_level_accuracy_5 = keras.backend.get_session().run(
81 | [val_loss_op, acc_top_1_op, acc_top_5_op], feed_dict={video_level_labels_k: video_level_labels_np, video_level_preds_k: video_level_preds_np})
82 |
83 | print("Spatial Model validation", "prec@1", video_level_accuracy_1, "prec@5", video_level_accuracy_5, "loss", video_level_loss)
84 |
85 | """
86 | Generate the data and save into pickles
87 | """
88 | ##############################################################################
89 | # test data generation
90 | if generate_test:
91 | test_progress = tqdm.tqdm(test_loader, total=len(test_loader))
92 |
93 | samples, labels = np.zeros([len(test_loader), testing_samples_per_video, feature_field_size], dtype=np.float32), np.zeros([len(test_loader), ], dtype=np.float32)
94 |
95 | last_access = 0
96 | for index, (video_frames, video_label) in enumerate(test_progress): # i don't need frame level labels
97 | feature_field, _ = spatial_model_with_2_outputs.predict_on_batch(video_frames)
98 | samples[index] = feature_field
99 | labels[index] = video_label
100 | last_access = index
101 |
102 | print("test samples:", samples.shape)
103 | print("test labels:", labels.shape)
104 | assert last_access == len(test_progress) - 1
105 |
106 | with open("test_features_spatial.pickle", 'wb') as f:
107 | pickle.dump((samples, labels), f)
108 |
109 | del samples, labels
110 | drive_manager.upload_project_file("test_features_spatial.pickle")
111 |
112 | ##############################################################################
113 | # train data generation
114 | for epoch in range(1):
115 | train_progress = tqdm.tqdm(train_loader, total=len(train_loader))
116 | samples, labels = np.zeros([len(train_loader), testing_samples_per_video, feature_field_size], dtype=np.float32), np.zeros([len(train_loader), ], dtype=np.float32)
117 |
118 | last_access = 0
119 | for index, (video_frames, video_label) in enumerate(train_progress): # i don't need frame level labels
120 | feature_field, _ = spatial_model_with_2_outputs.predict_on_batch(video_frames)
121 | samples[index] = feature_field
122 | labels[index] = video_label
123 | last_access = index
124 |
125 | print("train samples:", samples.shape)
126 | print("train labels:", labels.shape)
127 | assert last_access == len(train_loader) - 1
128 |
129 | with open("train_features_spatial.pickle", 'wb') as f:
130 | pickle.dump((samples, labels), f)
131 |
132 | del samples, labels
133 | drive_manager.upload_project_file("train_features_spatial.pickle")
134 | ##############################################################################
135 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | """
--------------------------------------------------------------------------------
/models/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/models/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/models/__pycache__/spatial_models.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/models/__pycache__/spatial_models.cpython-36.pyc
--------------------------------------------------------------------------------
/models/motion_models.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | This Contains two keras models as motion stream:
6 | motion streams expects data tensors in the form [batch_size,height,width,stacked_frames(u/v=10*2)]
7 | 1) Xception model
8 | 2) resnet 50
9 | """
10 | """
11 | To understand what is going look at this https://keras.io/applications/
12 | """
13 |
14 | import h5py
15 | import numpy as np
16 | import tensorflow.keras.backend as K
17 |
18 |
19 | from tensorflow.keras.applications.resnet50 import ResNet50
20 |
21 | from tensorflow.keras.layers import *
22 | #
23 | from tensorflow.keras.models import Model
24 |
25 | from tensorflow.python.keras.applications.xception import Xception
26 | from tensorflow.python.keras.engine.saving import load_attributes_from_hdf5_group
27 | from tensorflow.python.keras.utils import get_file
28 |
29 | # from keras.applications.resnet50 import WEIGHTS_PATH_NO_TOP can't be imported in newer versions so I copied it
30 | WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
31 |
32 | # from keras_applications.xception import TF_WEIGHTS_PATH_NO_TOP can't be imported in newer versions so I copied it
33 | TF_WEIGHTS_PATH_NO_TOP = (
34 | 'https://github.com/fchollet/deep-learning-models/'
35 | 'releases/download/v0.4/'
36 | 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5')
37 |
38 | def is_same_shape(shape1, shape2):
39 | """Checks if two structures[could be list or single value for example] have the same shape"""
40 | if len(shape1) != len(shape2):
41 | return False
42 | else:
43 | for i in range(len(shape1)):
44 | if shape1[i] != shape2[i]:
45 | return False
46 |
47 | return True
48 |
49 |
50 | # This piece of code is inspired by keras source
51 | def compare_layers_weights(first_model_layers, second_model_layers):
52 | """Compare layers weights: I use them to test the pre trained models are loaded correctly"""
53 | for i in range(len(first_model_layers)):
54 | weights1 = first_model_layers[i].get_weights()
55 | weights2 = second_model_layers[i].get_weights()
56 | if len(weights1) == len(weights2):
57 | if not all([is_same_shape(weights2[w].shape, weights1[w].shape) and np.allclose(weights2[w], weights1[w]) for w in range(len(weights1))]):
58 | print(first_model_layers[i].name, "!=", second_model_layers[i].name)
59 | else:
60 | print(first_model_layers[i].name, "!=", second_model_layers[i].name)
61 |
62 |
63 | # This piece of code is inspired by keras source
64 | def get_symbolic_filtered_layer_weights_from_model(model):
65 | """For the given model get the symbolic(tensors) weights"""
66 | symbolic_weights = []
67 | for layer in model.layers:
68 | if layer.weights:
69 | symbolic_weights.append(layer.weights)
70 | return symbolic_weights # now you can load those weights with tensorflow feed
71 |
72 |
73 | # This piece of code is inspired by keras source
74 | def get_named_layer_weights_from_h5py(h5py_file):
75 | """decodes h5py for a given model downloaded by keras and gets layer weight name to value mapping"""
76 | with h5py.File(h5py_file) as h5py_stream:
77 | layer_names = load_attributes_from_hdf5_group(h5py_stream, 'layer_names')
78 |
79 | weights_values = []
80 | for name in layer_names:
81 | layer = h5py_stream[name]
82 | weight_names = load_attributes_from_hdf5_group(layer, 'weight_names')
83 | if weight_names:
84 | weight_values = [np.asarray(layer[weight_name]) for weight_name in weight_names]
85 | weights_values.append((name, weight_values))
86 | return weights_values
87 |
88 |
89 | # This piece of code is inspired by keras source
90 | def load_layer_weights(weight_values, symbolic_weights):
91 | """loads weight_values which is a list ot tuples from get_named_layer_weights_from_h5py()
92 | into symbolic_weights obtained from get_symbolic_filtered_layer_weights_from_model()
93 | """
94 | if len(weight_values) != len(symbolic_weights): # they must have the same length of layers
95 | raise ValueError('number of weights aren\'t equal', len(weight_values), len(symbolic_weights))
96 | else: # similar to keras source code :D .. load_weights_from_hdf5_group
97 | print("length of layers to load", len(weight_values))
98 | weight_value_tuples = []
99 |
100 | # load layer by layer weights
101 | for i in range(len(weight_values)): # list(layers) i.e. list of lists(weights)
102 | assert len(symbolic_weights[i]) == len(weight_values[i][1])
103 | # symbolic_weights[i] : list of symbolic names for layer i
104 | # symbolic_weights[i] : list of weight ndarrays for layer i
105 | weight_value_tuples += zip(symbolic_weights[i], weight_values[i][1]) # both are lists with equal lengths (name,value) mapping
106 |
107 | K.batch_set_value(weight_value_tuples) # loaded a batch to be efficient
108 |
109 |
110 | def cross_modality_init(in_channels, kernel):
111 | """
112 | Takes a weight computed for RGB and produces a new wight to be used by motion streams which need about 20 channels !
113 | kernel is (x, y, 3, 64)
114 | """
115 | # if in_channels == 3: # no reason for cross modality
116 | # return kernel
117 | print("cross modality kernel", kernel.shape)
118 | avg_kernel = np.mean(kernel, axis=2) # mean (x, y, 64)
119 | weight_init = np.expand_dims(avg_kernel, axis=2) # mean (x, y, 1, 64)
120 | return np.tile(weight_init, (1, 1, in_channels, 1)) # mean (x, y, in_channels, 64)
121 |
122 |
123 | def CrossModalityResNet50(num_classes, pre_trained, cross_modality_pre_training, input_shape):
124 | """Pretrained Resnet50 model from keras which uses cross modality pretraining to obtain a convolution weight which suits 20 channels needed by motion stream"""
125 | cross_modality_pre_training = cross_modality_pre_training and pre_trained
126 |
127 | # create the model
128 | model = ResNet50(classes=num_classes, weights=None, input_shape=input_shape, include_top=True)
129 | channels = input_shape[2]
130 |
131 | # load weight file >>> downloads some file from github
132 | weights_path = get_file(
133 | 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
134 | WEIGHTS_PATH_NO_TOP,
135 | cache_subdir='models',
136 | md5_hash='a268eb855778b3df3c7506639542a6af')
137 |
138 | # get the named weights of each layer
139 | weight_values_ = get_named_layer_weights_from_h5py(weights_path)
140 | # get the symbolic weights of each layer
141 | symbolic_weights_ = get_symbolic_filtered_layer_weights_from_model(model)[:len(weight_values_)]
142 |
143 | if cross_modality_pre_training: # use a pretrained convolution weight
144 | # update it (name,[kernel,bias])
145 | # cross modality pre-training for kernel
146 | # leave bias as is of course
147 | weight_values_[0] = ("conv1_cross_modality",
148 | [cross_modality_init(kernel=weight_values_[0][1][0], in_channels=channels), # 0 = first layer , 1 = weight_value , 0 = kernel
149 | weight_values_[0][1][1]] # 0 = first layer , 1 = weight_value , 1 = bias
150 | )
151 |
152 | else: # start the first convolution layer as random glorot
153 | symbolic_weights_ = symbolic_weights_[1:]
154 | weight_values_ = weight_values_[1:]
155 |
156 | if pre_trained:
157 | # do weight loading
158 | load_layer_weights(weight_values=weight_values_, symbolic_weights=symbolic_weights_)
159 |
160 | return model
161 |
162 |
163 | class ResNet50MotionCNN:
164 | """
165 | ResNet model used for motion stream which is (input layer >> norm layer >> resnet50 model)
166 | """
167 | """
168 | pretrained+adam:
169 | scratch+adam:
170 |
171 | pretrained+MSGD:80%
172 | scratch+MSGD:
173 | """
174 |
175 | def __init__(self, num_classes, is_tesla_k80, stacked_frames, pre_trained=True, cross_modality_pre_training=True):
176 | self.is_teslaK80 = is_tesla_k80
177 | # input layer
178 | self.inputs = Input(shape=(224, 224, 2 * stacked_frames), name="input_motion")
179 |
180 | # data normalization
181 | self.data_norm = BatchNormalization(3, name='data_norm', center=False, scale=False)
182 | # create the base pre-trained model
183 | self.resnet = CrossModalityResNet50(num_classes=num_classes, pre_trained=pre_trained, cross_modality_pre_training=cross_modality_pre_training, input_shape=(224, 224, 2 * stacked_frames))
184 |
185 | def get_keras_model(self):
186 | # keras functional api
187 | return Model(self.inputs, self.resnet(self.data_norm(self.inputs)), name="motion_resnet")
188 |
189 | def get_loader_configs(self):
190 | return {"width": 224, "height": 224, "batch_size": 28 if self.is_teslaK80 else 24}
191 |
192 |
193 | def CrossModalityXception(num_classes, pre_trained, cross_modality_pre_training, input_shape, include_feature_fields=False):
194 | cross_modality_pre_training = cross_modality_pre_training and pre_trained
195 |
196 | # create the model
197 | model = Xception(classes=num_classes, weights=None, input_shape=input_shape, include_top=True)
198 | channels = input_shape[2]
199 |
200 | # load weight file >>> downloads some file from github
201 | weights_path = get_file(
202 | 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5',
203 | TF_WEIGHTS_PATH_NO_TOP,
204 | cache_subdir='models',
205 | file_hash='b0042744bf5b25fce3cb969f33bebb97')
206 |
207 | weight_values_ = get_named_layer_weights_from_h5py(weights_path)
208 | symbolic_weights_ = get_symbolic_filtered_layer_weights_from_model(model)[:len(weight_values_)]
209 |
210 | if cross_modality_pre_training: # use a pretrained convolution weight
211 | # update it (name,[kernel,bias])
212 | # cross modality pre-training for kernel
213 | # leave bias as is of course
214 | weight_values_[0] = ("conv1_cross_modality",
215 | [cross_modality_init(kernel=weight_values_[0][1][0], in_channels=channels), # 0 = first layer , 1 = weight_value , 0 = kernel
216 | # Xception has no bias
217 | ]
218 | )
219 |
220 | else: # start the first convolution layer as random glorot
221 | symbolic_weights_ = symbolic_weights_[1:]
222 | weight_values_ = weight_values_[1:]
223 |
224 | if pre_trained:
225 | # do weight loading
226 | load_layer_weights(weight_values=weight_values_, symbolic_weights=symbolic_weights_)
227 |
228 | if include_feature_fields:
229 | return Model(model.inputs, [layer.output for layer in model.layers[-2:]])
230 | else:
231 | return model
232 |
233 |
234 | class XceptionMotionCNN:
235 | """
236 | Xception model used for motion stream which is (input layer >> norm layer >> xception model)
237 | """
238 | """
239 | pretrained+adam: 84.4%
240 | scratch+adam:
241 |
242 | pretrained+MSGD:
243 | scratch+MSGD:
244 | """
245 |
246 | def __init__(self, num_classes, is_tesla_k80, stacked_frames, pre_trained=True, cross_modality_pre_training=True, include_feature_fields=False):
247 | self.is_teslaK80 = is_tesla_k80
248 | # input layer
249 | self.inputs = Input(shape=(299, 299, 2 * stacked_frames), name="input_motion")
250 | # data normalization
251 | self.data_norm = BatchNormalization(3, name='data_norm', center=False, scale=False)
252 |
253 | # create the base pre-trained model
254 | self.xception = CrossModalityXception(num_classes=num_classes, cross_modality_pre_training=cross_modality_pre_training, pre_trained=pre_trained, input_shape=(299, 299, 2 * stacked_frames), include_feature_fields=include_feature_fields)
255 |
256 | def get_keras_model(self):
257 | # keras functional api
258 | return Model(self.inputs, self.xception(self.data_norm(self.inputs)), name="motion_xception")
259 |
260 | def get_loader_configs(self):
261 | return {"width": 299, "height": 299, "batch_size": 28 if self.is_teslaK80 else 28}
262 |
263 |
264 | if __name__ == '__main__':
265 | # test :D
266 | model1 = ResNet50MotionCNN(num_classes=101, stacked_frames=10, is_tesla_k80=True)
267 | model2 = ResNet50MotionCNN2(num_classes=101, stacked_frames=10, is_tesla_k80=True)
268 | model3 = ResNet50()
269 | print(model1.layers)
270 | print(model2.layers)
271 | print(model3.layers)
272 | print(" ")
273 | compare_layers_weights(model1.layers[1].layers, model2.layers[1].layers)
274 | print(" ")
275 | compare_layers_weights(model3.layers, model2.layers[1].layers)
276 | print(" ")
277 | compare_layers_weights(model3.layers, model1.layers[1].layers)
278 | print(" ")
279 |
280 | print("xception test")
281 | model4 = Xception(input_shape=(299, 299, 3))
282 | model5 = XceptionMotionCNN(num_classes=101, is_tesla_k80=True, stacked_frames=10)
283 |
284 | print(model4.layers)
285 | print(model5.layers)
286 | compare_layers_weights(model4.layers, model5.layers[1].layers)
287 |
288 | print("values")
289 | print(model4.layers[1].weights)
290 | print(model4.layers[1].get_weights()[0][0, 0, :, 0])
291 | print(model5.layers[1].layers[1].get_weights()[0][0, 0, :, 0])
292 |
--------------------------------------------------------------------------------
/models/spatial_models.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | This Contains four keras models as spatial stream:
6 | motion streams expects data tensors in the form [batch_size,height,width,3)]
7 | 1) Xception model
8 | 2) resnet 50
9 | 3) VGG19
10 | 4) MobileNet
11 | """
12 | """
13 | To understand what is going look at this https://keras.io/applications/
14 | """
15 | from tensorflow.keras.applications.mobilenet import MobileNet
16 | from tensorflow.keras.applications.resnet50 import ResNet50
17 | from tensorflow.keras.applications.vgg19 import VGG19
18 | from tensorflow.keras.applications.xception import Xception
19 | from tensorflow.keras.models import Model
20 | from tensorflow.python.keras import Input
21 | from tensorflow.python.keras.layers import Reshape, Activation, Dropout, GlobalAveragePooling2D, Conv2D, Flatten, Dense, BatchNormalization
22 |
23 |
24 | class ResNet50SpatialCNN:
25 | """
26 | ResNet model used for spatial stream which is
27 | (input layer >> norm layer >> resnet50 without prediction layers(look at keras docs https://keras.io/applications/) >> flatting >> softmax projection)
28 | """
29 | """
30 | pretrained+adam: 80 ~ 81.2
31 | scratch+adam: 0.42215174 !!! imagenet pre training is really important
32 |
33 | pretrained+MSGD: 78.5 ~ 80
34 | scratch+MSGD:
35 | """
36 |
37 | def __init__(self, num_classes, is_tesla_k80, pre_trained=True):
38 | self.is_teslaK80 = is_tesla_k80
39 |
40 | # input layer
41 | self.inputs = Input(shape=(224, 224, 3), name="input_spatial")
42 | # data normalization
43 | self.data_norm = BatchNormalization(3, name='data_norm', center=False, scale=False)
44 |
45 | # create the base pre-trained model
46 | self.resnet = ResNet50(weights='imagenet' if pre_trained else None, include_top=False)
47 |
48 | # print(self.base_model.get_layer('avg_pool').__dict__)
49 | self.flat = Flatten(name="flatten")
50 |
51 | # self.drop_out_fc = keras.layers.Dropout(.75)
52 | self.fc_custom = Dense(num_classes, name="fc_custom", activation="softmax")
53 |
54 | def get_keras_model(self):
55 | # keras functional api
56 | def model(inputs):
57 | return self.fc_custom(self.flat(self.resnet(self.data_norm(inputs))))
58 |
59 | return Model(self.inputs, model(self.inputs), name="spatial_resnet50")
60 |
61 | def get_loader_configs(self):
62 | return {"width": 224, "height": 224, "batch_size": 76 if self.is_teslaK80 else 48}
63 |
64 |
65 | class XceptionSpatialCNN:
66 | """
67 | ResNet model used for spatial stream which is
68 | (input layer >> norm layer >> xception without prediction layers (look at keras docs https://keras.io/applications/) >> GlobalAveragePooling2D >> softmax projection)
69 | """
70 | """
71 | pretrained+adam: 86.12% <3
72 | scratch+adam:
73 |
74 | pretrained+MSGD:82%
75 | scratch+MSGD:
76 | """
77 |
78 | def __init__(self, num_classes, is_tesla_k80, pre_trained=True):
79 | self.is_teslaK80 = is_tesla_k80
80 | # input layer
81 | self.inputs = Input(shape=(299, 299, 3), name="input_spatial")
82 | # data normalization
83 | self.data_norm = BatchNormalization(3, name='data_norm', center=False, scale=False)
84 |
85 | # create the base pre-trained model
86 | self.xception = Xception(weights='imagenet' if pre_trained else None, include_top=False, input_shape=(299, 299, 3))
87 |
88 | self.GlobalAveragePooling2D = GlobalAveragePooling2D(name='avg_pool')
89 |
90 | # self.drop_out_fc = keras.layers.Dropout(.75)
91 | self.fc_custom = Dense(num_classes, name="predictions", activation="softmax")
92 |
93 | def get_keras_model(self):
94 | # print(inputs)
95 | def model(inputs):
96 | return self.fc_custom(self.GlobalAveragePooling2D(self.xception(self.data_norm(inputs))))
97 |
98 | return Model(self.inputs, model(self.inputs), name="spatial_xception")
99 |
100 | def get_loader_configs(self):
101 | return {"width": 299, "height": 299, "batch_size": 28 if self.is_teslaK80 else 28} # 28
102 |
103 |
104 | class VGGSpatialCNN:
105 | """
106 | VGG19 model used for spatial stream which is
107 | (input layer >> norm layer >> VGG19 without prediction layers (look at keras docs https://keras.io/applications/) >> GlobalAveragePooling2D >> softmax projection)
108 | """
109 | """
110 | pretrained+adam:
111 | scratch+adam:
112 |
113 | pretrained+MSGD: 70%
114 | scratch+MSGD:
115 | """
116 |
117 | def __init__(self, num_classes, is_tesla_k80, pre_trained=True):
118 | self.is_teslaK80 = is_tesla_k80
119 | # input layer
120 | self.inputs = Input(shape=(224, 224, 3), name="input_spatial")
121 | # data normalization
122 | self.data_norm = BatchNormalization(3, name='data_norm', center=False, scale=False)
123 |
124 | # create the base pre-trained model
125 | self.vgg19_no_top = VGG19(weights='imagenet' if pre_trained else None, include_top=False)
126 |
127 | self.flat = Flatten(name='flatten')
128 | self.Dense_1 = Dense(4096, activation='relu', name='fc1')
129 | self.Dense_2 = Dense(4096, activation='relu', name='fc2')
130 | self.Dense_3 = Dense(num_classes, activation='softmax', name='predictions')
131 |
132 | def get_keras_model(self):
133 | # print(inputs)
134 | def model(inputs):
135 | x = self.vgg19_no_top(self.data_norm(inputs))
136 | x = self.flat(x)
137 | x = self.Dense_1(x)
138 | x = self.Dense_2(x)
139 | prediction = self.Dense_3(x)
140 | return prediction
141 |
142 | return Model(self.inputs, model(self.inputs), name="spatial_vgg19")
143 |
144 | def get_loader_configs(self):
145 | return {"width": 224, "height": 224, "batch_size": 40 if self.is_teslaK80 else 40}
146 |
147 |
148 | class MobileSpatialCNN:
149 | """
150 | MobileNet model used for spatial stream which is
151 | (input layer >> norm layer >> MobileNet without prediction layers (look at keras docs https://keras.io/applications/) >> GlobalAveragePooling2D >> softmax projection)
152 | """
153 | """
154 | pretrained+adam:
155 | scratch+adam:
156 |
157 | pretrained+MSGD:
158 | scratch+MSGD:
159 | """
160 |
161 | def __init__(self, num_classes, is_tesla_k80, alpha=1, dropout=1e-3, pre_trained=True):
162 | self.is_teslaK80 = is_tesla_k80
163 |
164 | # input layer
165 | self.inputs = Input(shape=(224, 224, 3), name="input_spatial")
166 | # data normalization
167 | self.data_norm = BatchNormalization(3, name='data_norm', center=False, scale=False)
168 |
169 | # create the base pre-trained model
170 | self.mobile_net = MobileNet(weights='imagenet' if pre_trained else None, include_top=False)
171 |
172 | self.GlobalAveragePooling2D = GlobalAveragePooling2D()
173 |
174 | shape = (1, 1, int(1024 * alpha))
175 | self.Reshape_1 = Reshape(shape, name='reshape_1')
176 | self.Dropout = Dropout(dropout, name='dropout')
177 | self.Conv2D = Conv2D(num_classes, (1, 1), padding='same', name='conv_preds')
178 | self.Activation = Activation('softmax', name='act_softmax')
179 | self.Reshape_2 = Reshape((num_classes,), name='reshape_2')
180 |
181 | def get_keras_model(self):
182 | def model(inputs):
183 | x = self.mobile_net(self.data_norm(inputs))
184 | x = self.GlobalAveragePooling2D(x)
185 | x = self.Reshape_1(x)
186 | x = self.Dropout(x)
187 | x = self.Conv2D(x)
188 | x = self.Activation(x)
189 | prediction = self.Reshape_2(x)
190 | return prediction
191 |
192 | return Model(self.inputs, model(self.inputs), name="spatial_mobilenet")
193 |
194 | def get_loader_configs(self):
195 | return {"width": 224, "height": 224, "batch_size": 100 if self.is_teslaK80 else 100}
196 |
--------------------------------------------------------------------------------
/motion_trainer.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Here I'm training motion stream CNN in the following steps:
6 | 1. load configs from configs.motion_configs (indicating architecture/optimizer/lr/pretrained..)
7 | 2. initialize your dataloader >> feeding the data efficiently to the model
8 | 3. load the latest snapshot of the model from drive (It's public and will be downloaded for you)..
9 | note: folders are identified on my drive with their experiment_identifier
10 | for example heavy-mot-xception-adam-1e-05-imnet is (heavy augmentation,motion stream,xception architecture,adam optimizer with lr = 1e-05 pretrained on imagenet)
11 | this long experiment_identifier is given to drive manager and downloaded automatically and continue training from that checkpoint
12 | view my experiments :https://drive.google.com/drive/folders/1B82anWV8Mb4iHYmOp9tIR9aOTlfllwsD
13 | to make your own experiments on your drive you will need to modify DriveManager at utils.drive_manager and use some other long experiment_identifier
14 | for example make this personal.heavy-mot-xception-adam-1e-05-imnet as suffix at line 31
15 |
16 | 4. As the checkpoint is downloaded or not found the trainer will start from scratch or to continue from where it stopped (the checkpoint)
17 |
18 | note: validation is done by MotionValidationCallback which validates on the given dataset evaluation section
19 | """
20 | from functools import partial
21 |
22 | import frame_dataloader
23 | import utils.training_utils as eval_globals
24 | from configs.motion_configs import *
25 | from evaluation import legacy_load_model, get_batch_size
26 | from evaluation.evaluation import *
27 | from models.motion_models import *
28 | from utils import log, get_augmenter_text
29 | from utils.drive_manager import DriveManager
30 |
31 | ################################################################################
32 | """Files, paths & identifier"""
33 | suffix = "" # put your name or anything(your crush :3) :D
34 | experiment_identifier = suffix + ("" if suffix == "" else "-") + get_augmenter_text(augmenter_level) + "-mot-" + model_name + "-" + ("adam" if is_adam else "SGD") + "-" + str(lr) + "-" + ("imnet" if pretrained else "scrat")
35 | log_file = "motion.log"
36 | log_stream = open("motion.log", "a")
37 | h5py_file = "motion.h5"
38 | pred_file = "motion.preds"
39 | ################################################################################
40 | """Checking latest"""
41 | print(experiment_identifier)
42 | num_actions = 101
43 | print("Number of workers:", workers, file=log_stream)
44 | drive_manager = DriveManager(experiment_identifier)
45 | checkpoint_found, zip_file_name = drive_manager.get_latest_snapshot()
46 | ################################################################################
47 | # you need to send it as callback before keras reduce on plateau
48 | MotionValidationCallback = partial(eval_globals.get_validation_callback,
49 | log_stream=log_stream,
50 | validate_every=validate_every,
51 | testing_samples_per_video=testing_samples_per_video,
52 | pred_file=pred_file, h5py_file=h5py_file, drive_manager=drive_manager, log_file=log_file)
53 |
54 | data_loader = partial(frame_dataloader.MotionDataLoader,
55 | testing_samples_per_video=testing_samples_per_video,
56 | augmenter_level=augmenter_level,
57 | log_stream=log_stream, stacked_frames=stacked_frames)
58 |
59 | if checkpoint_found:
60 | # restore the model from the checkpoint
61 | log("Model restored")
62 | eval_globals.best_video_level_accuracy_1 = float(zip_file_name.split("-")[1])
63 | log("Current Best", eval_globals.best_video_level_accuracy_1)
64 |
65 | motion_model_restored = legacy_load_model(filepath=h5py_file, custom_objects={'sparse_categorical_cross_entropy_loss': sparse_categorical_cross_entropy_loss, "acc_top_1": acc_top_1, "acc_top_5": acc_top_5})
66 | # init data loader
67 | train_loader, test_loader, test_video_level_label = data_loader(width=int(motion_model_restored.inputs[0].shape[1]),
68 | height=int(motion_model_restored.inputs[0].shape[2]),
69 | batch_size=get_batch_size(motion_model_restored,
70 | spatial=False)).run()
71 |
72 | # training
73 | motion_model_restored.fit_generator(train_loader,
74 | steps_per_epoch=len(train_loader), # generates a batch per step
75 | epochs=epochs,
76 | use_multiprocessing=False, workers=workers,
77 | # validation_data=gen_test(), validation_steps=len(test_loader.dataset)
78 | callbacks=[MotionValidationCallback(model=motion_model_restored, test_loader=test_loader, test_video_level_label=test_video_level_label), # returns callback instance
79 | keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=validate_every, verbose=1)],
80 | initial_epoch=int(zip_file_name.split("-")[0])) # get epoch number
81 |
82 | else:
83 | # init the model from scratch
84 | log("Starting from scratch")
85 |
86 | if model_name == "resnet":
87 | model = ResNet50MotionCNN(num_classes=num_actions,
88 | is_tesla_k80=is_tesla_k80,
89 | pre_trained=True if pretrained else False,
90 | stacked_frames=stacked_frames)
91 | elif model_name == "xception":
92 | model = XceptionMotionCNN(num_classes=num_actions,
93 | is_tesla_k80=is_tesla_k80,
94 | pre_trained=True if pretrained else False,
95 | stacked_frames=stacked_frames)
96 |
97 | # noinspection PyUnboundLocalVariable
98 | keras_motion_model = model.get_keras_model()
99 |
100 | # init data loader
101 | train_loader, test_loader, test_video_level_label = data_loader(**model.get_loader_configs()).run() # batch_size, width , height)
102 |
103 | keras_motion_model.compile(optimizer=keras.optimizers.Adam(lr=lr) if is_adam else keras.optimizers.SGD(lr=lr, momentum=0.9),
104 | loss=sparse_categorical_cross_entropy_loss,
105 | metrics=[acc_top_1, acc_top_5])
106 |
107 | keras_motion_model.summary(print_fn=lambda *args: print(args, file=log_stream))
108 | keras_motion_model.summary()
109 | log_stream.flush()
110 |
111 | # training
112 | keras_motion_model.fit_generator(train_loader,
113 | steps_per_epoch=len(train_loader), # generates a batch per step
114 | epochs=epochs,
115 | use_multiprocessing=False, workers=workers,
116 | # validation_data=gen_test(), validation_steps=len(test_loader.dataset)
117 | callbacks=[MotionValidationCallback(model=keras_motion_model, test_loader=test_loader, test_video_level_label=test_video_level_label), # returns callback instance
118 | keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=validate_every * 10, verbose=1)],
119 | )
120 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Action Recognition [no longer maintained]
2 |
3 |
4 |
5 |
6 |
7 | In this repo we study the problem of action recognition(recognizing actions in videos) on UCF101 famous dataset.
8 |
9 | Here, I reimplemented two-stream approach for action recognition using pre-trained Xception networks for both streams(Look at references).
10 |
11 | # Live demo on Colab
12 | Just clone **Live Demo Two-steam net.ipynb** notebook to your drive and run the cells on Google Colab (Something like the demo gif will be generated in video format)
13 |
14 | # Get started:
15 | A full demo of the code in the repo can be found in **Action_Recognition_Walkthrough.ipynb** notebook.
16 |
17 | Please clone **Action_Recognition_Walkthrough.ipynb** notebook to your drive account and run it on Google Colab on python3 GPU-enabled instance.
18 |
19 | ## Environment and requirements:
20 | This code requires python 3.6,
21 | ```
22 | Tensorflow 1.11.0 (GPU enabled-the code uses keras associated with Tensorflow)
23 | Imgaug 0.2.6
24 | opencv 3.4.2.17
25 | numpy 1.14.1
26 | ```
27 | All of these requirements are satisfied by (python3 Colab GPU-enabled instance) Just use it and the notebook **Action_Recognition_Walkthrough.ipynb** will install the rest :)
28 |
29 |
30 | ## Dataset:
31 | I used UCF101 dataset originally found [here](https://www.crcv.ucf.edu/datasets/human-actions/ucf101/UCF101.rar).
32 |
33 | Also the dataset is processed and published by [feichtenhofer/twostreamfusion](https://github.com/feichtenhofer/twostreamfusion))
34 | * RGB images(single zip file split into three parts)
35 | ```
36 | wget http://ftp.tugraz.at/pub/feichtenhofer/tsfusion/data/ucf101_jpegs_256.zip.001
37 | wget http://ftp.tugraz.at/pub/feichtenhofer/tsfusion/data/ucf101_jpegs_256.zip.002
38 | wget http://ftp.tugraz.at/pub/feichtenhofer/tsfusion/data/ucf101_jpegs_256.zip.003
39 | ```
40 | * Optical Flow u/v frames(single zip file split into three parts)
41 | ```
42 | wget http://ftp.tugraz.at/pub/feichtenhofer/tsfusion/data/ucf101_tvl1_flow.zip.001
43 | wget http://ftp.tugraz.at/pub/feichtenhofer/tsfusion/data/ucf101_tvl1_flow.zip.002
44 | wget http://ftp.tugraz.at/pub/feichtenhofer/tsfusion/data/ucf101_tvl1_flow.zip.003
45 | ```
46 |
47 | ## Code Features:
48 | * You have variety of models to exchange between them easily.
49 | * Saves checkpoints on regular intervals and those checkpoints are synchronized to google drive using Drive API which means you can resume training anywhere for any Goggle Colab Instance.
50 | * Accesses the public models on my drive and you can resume and fine-tune them at different time stamps.
51 | Where the name of every checkpoint is as follows, **EPOCH.BEST_TOP_1_ACC.CURRENT_TOP_1_ACC**
52 | for example [this](https://drive.google.com/open?id=1N697z8uvAHICBbFNOJyKn4nbT64rUTcB)
53 | which is **300-0.84298-0.84166.zip** in folder **heavy-mot-xception-adam-1e-05-imnet**
54 | at this checkpoint,
55 | * **epoch=300**
56 | * **best top 1 accuracy was 0.84298** (obtained in checkpoint before 300)
57 | * **the current accuracy is 0.84166**
58 | * in the experiment **heavy-mot-xception-adam-1e-05-imnet**
59 | ## Models:
60 | I used pre-trained models on imagenet provided by keras applications [here](https://keras.io/applications/).
61 |
62 | The best results are obtained using Xception architecture.
63 |
64 |
65 | Network | Top1-Acc |
66 | --------------|:-------:|
67 | Spatial VGG19 stream | ~75% |
68 | Spatial Resnet50 stream | 81.2% |
69 | Spatial Xception stream | 86.04%|
70 | ------------------------|-------|
71 | Motion Resnet50 stream | ~75% |
72 | Motion xception stream | 84.4% |
73 | ------------------------|-------|
74 | Average fusion| **91.25%** |
75 | ------------------------|-------|
76 | Recurrent network fusion| **91.7%** |
77 |
78 | ## Pre-trained Model
79 | All the pre-trained models could be found [here](https://drive.google.com/drive/folders/1B82anWV8Mb4iHYmOp9tIR9aOTlfllwsD).
80 |
81 | It's the same drive folder accessed by the code while training and resuming training from a checkpoint.
82 |
83 | ## Reference Papers:
84 | * [[1] Two-stream convolutional networks for action recognition in videos](https://arxiv.org/pdf/1406.2199.pdf)
85 | * [[2] Real-time Action Recognition with Enhanced Motion Vector CNNs](https://arxiv.org/pdf/1604.07669.pdf)
86 | * [[3] Towards Good Practices for Very Deep Two-Stream ConvNets](https://arxiv.org/pdf/1507.02159.pdf)
87 |
88 |
89 | ## Nice implementations of two-stream approach:
90 | * [[1] Nice two-stream reimplementation using pytorch using resnets](https://github.com/jeffreyhuang1/two-stream-action-recognition)
91 | My code is inspired by this repo.
92 | * [[2] Two-stream-pytorch](https://github.com/bryanyzhu/two-stream-pytorch)
93 | * [[3] Hidden-Two-Stream](https://github.com/bryanyzhu/Hidden-Two-Stream)
94 |
95 |
96 | ## Future directions:
97 | * [[1] Hidden-Two-stream](https://arxiv.org/pdf/1704.00389.pdf)
98 | Which achieves real-time performance by using a deep neural net for generating the optical flow.
99 | * [[2] Can Spatiotemporal 3D CNNs Retrace the History of 2D CNNs and ImageNet?](https://arxiv.org/pdf/1711.09577.pdf)
100 | Discuses how 3d convolutions is the perfect architecture for videos and Kinetics dataset pre-training could retrace imagenet pre-training.
101 | * [[3] Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset](https://arxiv.org/pdf/1705.07750.pdf)
102 |
103 | ## Useful links:
104 | * [[1] awesome-action-recognition](https://github.com/jinwchoi/awesome-action-recognition)
105 |
--------------------------------------------------------------------------------
/recurrent_fusion_trainer.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Here I'm training video level network based on recurrent networks(frames from CNN are concatenated into a 3d tensor and feed to RNN):
6 | 1. setting configs (considering concatenation will have feature of 4096 = 2048 *2)
7 | 2. generating experiment_identifier and creating files
8 | 3. downloading pickled data from drive each pickled file is a big numpy array whose shape is [instances, samples per video,features(2048 or 4096) ]
9 | """
10 | import glob
11 | import pickle
12 | import random
13 | import shutil
14 |
15 | from tensorflow.keras import backend as K
16 | from tensorflow.python.keras import Model
17 | from tensorflow.python.keras.layers import Dense, Softmax, GRU
18 |
19 | import utils.training_utils as eval_globals
20 | from evaluation.evaluation import *
21 | from utils import log
22 | from utils.drive_manager import DriveManager
23 |
24 | ################################################################################
25 | """Configs"""
26 | lr = 1e-6
27 | hidden_state = 128
28 | feature_field = 2048
29 | testing_samples_per_video = 19
30 | epochs = 1200
31 | save_every = 25
32 | batch_size = 64
33 |
34 | num_training_samples = 9537
35 | num_testing_samples = 3783
36 |
37 | is_spatial = True
38 | is_motion = True
39 |
40 | if is_spatial and is_motion:
41 | feature_field *= 2
42 | ################################################################################
43 | """Files, paths & identifier"""
44 | suffix = "" # put your name or anything :D
45 | experiment_identifier = suffix + "recurrent_fusion_selu_atten_simple" + str(lr)
46 | ################
47 | log_file = experiment_identifier + ".log"
48 | log_stream = open(log_file, "a")
49 | checkpoint_dir = "./fusion/"
50 | checkpoints = checkpoint_dir + "fusion_chk"
51 | try:
52 | shutil.rmtree(checkpoint_dir)
53 | except:
54 | pass
55 | drive_manager = DriveManager(experiment_identifier)
56 | checkpoint_found, zip_file_name = drive_manager.get_latest_snapshot()
57 | ################################################################################
58 | """sanity check"""
59 | if not is_motion and not is_spatial:
60 | exit()
61 | ################################################################################
62 | """Downloads the files and makes sure files aren't re-downloaded with every run if no one is missed"""
63 | if is_spatial:
64 | drive_manager_spatial = DriveManager("spatial_feature_dataset")
65 | test_spatial = drive_manager_spatial.search_file("test_features_spatial.pickle")
66 | train_spatial = drive_manager_spatial.search_file("train_features_spatial.pickle")
67 |
68 | if len(test_spatial) == 0:
69 | print("Please run 'generate_spatial_feature_dataset.py' and generate 'test_features_spatial.pickle'..this file will be saved to your drive in '/spatial_feature_dataset'".format(drive_manager_spatial.personal_dfolder))
70 | exit()
71 |
72 | if len(train_spatial) == 0:
73 | print("Please run 'generate_spatial_feature_dataset.py' and generate 'train_features_spatial.pickle'..those files will be saved to your drive in '/spatial_feature_dataset'".format(drive_manager_spatial.personal_dfolder))
74 | exit()
75 |
76 | drive_manager_spatial.download_file(test_spatial[0]["id"], "test_features_spatial.pickle", unzip=False)
77 |
78 | if len(glob.glob("train_features_spatial.pickle*")) != len(train_spatial):
79 | drive_manager_spatial.download_files_list(train_spatial, False, False)
80 |
81 | if is_motion:
82 | drive_manager_motion = DriveManager("motion_feature_dataset")
83 |
84 | test_motion = drive_manager_motion.search_file("test_features_motion.pickle")
85 | train_motion = drive_manager_motion.search_file("train_features_motion.pickle")
86 |
87 | if len(test_motion) == 0:
88 | print("Please run 'generate_motion_feature_dataset.py' and generate 'test_features_motion.pickle'..this file will be saved to your drive in '/motion_feature_dataset'".format(drive_manager_motion.personal_dfolder))
89 | exit()
90 |
91 | if len(train_motion) == 0:
92 | print("Please run 'generate_motion_feature_dataset.py' and generate 'train_features_motion.pickle'..those files will be saved to your drive in '/motion_feature_dataset'".format(drive_manager_motion.personal_dfolder))
93 | exit()
94 |
95 | drive_manager_motion.download_file(test_motion[0]["id"], "test_features_motion.pickle", unzip=False)
96 |
97 | if len(glob.glob("train_features_motion.pickle*")) != len(train_motion):
98 | drive_manager_motion.download_files_list(train_motion, False, False)
99 | ################################################################################
100 | seen_spatial_files = set()
101 | seen_motion_files = set()
102 |
103 |
104 | def train_generator():
105 | while True:
106 | train_samples_spatial, train_labels_spatial, train_samples_motion, train_labels_motion = [0] * 4
107 | """Choose file to read while being downloaded then read files"""
108 |
109 | """load spatial data"""
110 | if is_spatial:
111 | spatial_features_files = glob.glob("train_features_spatial.pickle*")
112 | if len(spatial_features_files) == len(seen_spatial_files):
113 | seen_spatial_files.clear()
114 |
115 | while True:
116 | spatial_features_file = random.sample(spatial_features_files, k=1)[0]
117 | if spatial_features_file not in seen_spatial_files:
118 |
119 | try:
120 | with open(spatial_features_file, 'rb') as f:
121 | train_samples_spatial, train_labels_spatial = pickle.load(f)
122 |
123 | # print("chose:", spatial_features_file)
124 | seen_spatial_files.add(spatial_features_file)
125 | break
126 | except:
127 | pass
128 |
129 | """load motion data"""
130 | if is_motion:
131 | motion_features_files = glob.glob("train_features_motion.pickle*")
132 | if len(motion_features_files) == len(seen_motion_files):
133 | seen_motion_files.clear()
134 |
135 | while True:
136 | motion_features_file = random.sample(motion_features_files, k=1)[0]
137 | if motion_features_file not in seen_motion_files:
138 |
139 | try:
140 | with open(motion_features_file, 'rb') as f:
141 | train_samples_motion, train_labels_motion = pickle.load(f)
142 |
143 | # print("chose:", motion_features_file)
144 | seen_motion_files.add(motion_features_file)
145 | break
146 | except:
147 | pass
148 |
149 | """generation loop"""
150 | permutation = list(range((num_training_samples + batch_size - 1) // batch_size))
151 | random.shuffle(permutation)
152 |
153 | if is_spatial != is_motion: # xor
154 | # single stream motion or spatial
155 | if is_spatial:
156 | train_samples, train_labels = train_samples_spatial, train_labels_spatial
157 | assert train_samples_spatial.shape[0] == num_training_samples
158 | else:
159 | train_samples, train_labels = train_samples_motion, train_labels_motion
160 | assert train_samples_motion.shape[0] == num_training_samples
161 |
162 | for batch_index in permutation:
163 | yield train_samples[batch_index * batch_size:(batch_index + 1) * batch_size], train_labels[batch_index * batch_size:(batch_index + 1) * batch_size]
164 | else:
165 | # concatenate samples from motion and spatial
166 | assert np.allclose(train_labels_spatial, train_labels_motion)
167 | assert train_samples_spatial.shape[0] == num_training_samples
168 | assert train_samples_motion.shape[0] == num_training_samples
169 |
170 | for batch_index in permutation:
171 | yield np.concatenate([train_samples_spatial[batch_index * batch_size:(batch_index + 1) * batch_size], train_samples_motion[batch_index * batch_size:(batch_index + 1) * batch_size]], axis=2), train_labels_spatial[batch_index * batch_size:(batch_index + 1) * batch_size]
172 |
173 |
174 | def test_generator():
175 | """load spatial test data"""
176 | if is_spatial:
177 | with open("test_features_spatial.pickle", 'rb') as f:
178 | test_samples_spatial, test_labels_spatial = pickle.load(f)
179 |
180 | """load motion test data"""
181 | if is_motion:
182 | with open("test_features_motion.pickle", 'rb') as f:
183 | test_samples_motion, test_labels_motion = pickle.load(f)
184 |
185 | while True:
186 | if is_spatial != is_motion: # xor
187 | # single stream motion or spatial
188 | if is_spatial:
189 | # noinspection PyUnboundLocalVariable
190 | test_samples, test_labels = test_samples_spatial, test_labels_spatial
191 | assert test_samples_spatial.shape[0] == num_testing_samples
192 | else:
193 | # noinspection PyUnboundLocalVariable
194 | test_samples, test_labels = test_samples_motion, test_labels_motion
195 | assert test_samples_motion.shape[0] == num_testing_samples
196 |
197 | for batch_index in range((test_samples.shape[0] + batch_size - 1) // batch_size):
198 | yield test_samples[batch_index * batch_size:(batch_index + 1) * batch_size], test_labels[batch_index * batch_size:(batch_index + 1) * batch_size]
199 |
200 | else:
201 | # concatenate samples from motion and spatial
202 | assert np.allclose(test_labels_motion, test_labels_spatial)
203 | assert test_samples_spatial.shape[0] == num_testing_samples
204 | assert test_samples_motion.shape[0] == num_testing_samples
205 |
206 | for batch_index in range((num_testing_samples + batch_size - 1) // batch_size):
207 | yield np.concatenate([test_samples_spatial[batch_index * batch_size:(batch_index + 1) * batch_size], test_samples_motion[batch_index * batch_size:(batch_index + 1) * batch_size]], axis=2), test_labels_spatial[batch_index * batch_size:(batch_index + 1) * batch_size]
208 |
209 |
210 | class saver_callback(tf.keras.callbacks.Callback):
211 | """
212 | save checkpoint with tensorflow saver not h5py since my model implementation is supclass api not function >> function implementation is left as TODO
213 | also logging model state and uploading the file
214 | """
215 |
216 | def on_epoch_end(self, epoch, logs={}):
217 | epoch_one_based = epoch + 1
218 | if epoch_one_based % save_every == 0 and epoch_one_based > 0:
219 | log("=" * 100 + "\n(Training:)Epoch", epoch_one_based, "prec@1", logs["acc_top_1"], "prec@5", logs["acc_top_5"], "loss", logs["loss"], file=log_stream)
220 | log("(Validation:)Epoch", epoch_one_based, "prec@1", logs["val_acc_top_1"], "prec@5", logs["val_acc_top_5"], "loss", logs["val_loss"], file=log_stream)
221 |
222 | if logs["val_acc_top_1"] > eval_globals.best_video_level_accuracy_1:
223 | log("Epoch", epoch_one_based, "Established new baseline:", logs["val_acc_top_1"], file=log_stream)
224 | eval_globals.best_video_level_accuracy_1 = logs["val_acc_top_1"]
225 |
226 | # save the model and pickle
227 | #
228 | else:
229 | log("Epoch", epoch_one_based, "Baseline:", eval_globals.best_video_level_accuracy_1, "but got:", logs["val_acc_top_1"], file=log_stream)
230 |
231 | saver.save(tf.keras.backend.get_session(), checkpoints)
232 |
233 | drive_manager.upload_project_files(
234 | files_list=[log_file],
235 | dir_list=[checkpoint_dir],
236 | snapshot_name=str(epoch_one_based) + "-" + "{0:.5f}".format(eval_globals.best_video_level_accuracy_1) + "-" + "{0:.5f}".format(logs["val_acc_top_1"]))
237 |
238 |
239 | class Model(tf.keras.Model):
240 | def __init__(self):
241 | super(Model, self).__init__()
242 |
243 | self.gru_1 = GRU(hidden_state, return_sequences=True, input_shape=(testing_samples_per_video, feature_field), dropout=.5) # recurrent layer
244 | # self.gru_2 = GRU(hidden_state, return_sequences=True)
245 |
246 | self.attention_layer = Dense(1) # gets attention weight for time step
247 | self.attention_normalizer = Softmax(axis=1) # normalizes the 3d tensor to give weight for each time step
248 |
249 | self.FC_1 = Dense(hidden_state // 2, activation='selu')
250 | # recurrent_fusion_model.add(BatchNormalization())
251 | # self.FC_2 = Dense(hidden_state // 4, activation='selu')
252 | # self.BN_1 = BatchNormalization()
253 | self.classification_layer = Dense(num_actions, activation='softmax')
254 |
255 | def call(self, input_visual_feature, training=None, mask=None):
256 | internal = self.gru_1(input_visual_feature) # returns a sequence of vectors of dimension feature_field
257 | # in self attention i will return_sequences of course
258 | # internal = self.gru_2(internal) # returns a sequence of vectors of dimension feature_field
259 |
260 | un_normalized_attention_weights = self.attention_layer(internal)
261 | normalized_attention_weights = self.attention_normalizer(un_normalized_attention_weights) # normalize on timesteps dimension
262 | internal = normalized_attention_weights * internal
263 | print(internal)
264 | attention_vector = K.sum(internal, axis=1) # sum on timesteps
265 | print(attention_vector)
266 | # recurrent_fusion_model.add(Dense(hidden_state // 2, activation='relu'))
267 | # recurrent_fusion_model.add(BatchNormalization())
268 | internal = self.FC_1(attention_vector)
269 | # internal = self.FC_2(internal)
270 | final_output = self.classification_layer(internal)
271 |
272 | return final_output
273 |
274 |
275 | # create the model
276 | recurrent_fusion_model = Model()
277 | recurrent_fusion_model.compile(optimizer=keras.optimizers.Adam(lr=lr), loss=sparse_categorical_cross_entropy_loss, metrics=[acc_top_1, acc_top_5])
278 |
279 | # build internal tensors
280 | recurrent_fusion_model.fit(*next(train_generator()), batch_size=1, epochs=1, verbose=0)
281 |
282 | # get tensorflow saver ready > will be used if a checkpoint found on drive
283 | saver = tf.train.Saver(recurrent_fusion_model.variables)
284 |
285 | if checkpoint_found:
286 | # restore the model from the checkpoint
287 | log("Model restored")
288 | eval_globals.best_video_level_accuracy_1 = float(zip_file_name.split("-")[1])
289 | log("Current Best", eval_globals.best_video_level_accuracy_1)
290 |
291 | saver.restore(tf.keras.backend.get_session(), checkpoints) # use tensorflow saver
292 | initial_epoch = int(zip_file_name.split("-")[0]) # get epoch number
293 | else:
294 | # init the model from scratch, it's already done
295 | log("Starting from scratch")
296 | # expected input data shape: (batch_size, timesteps, data_dim)
297 | recurrent_fusion_model.summary()
298 | initial_epoch = 0
299 |
300 | # training
301 | recurrent_fusion_model.fit_generator(train_generator(), use_multiprocessing=False,
302 | epochs=epochs, steps_per_epoch=(num_training_samples + batch_size - 1) // batch_size,
303 | validation_data=test_generator(), validation_steps=(num_testing_samples + batch_size - 1) // batch_size,
304 | callbacks=[saver_callback(), keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=50, verbose=1, min_lr=lr / 10)],
305 | initial_epoch=initial_epoch)
306 |
--------------------------------------------------------------------------------
/spatial_trainer.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Here I'm training spatial stream CNN in the following steps:
6 | 1. load configs from configs.spatial (indicating architecture/optimizer/lr/pretrained..)
7 | 2. initialize your dataloader >> feeding the data efficiently to the model
8 | 3. load the latest snapshot of the model from drive (It's public and will be downloaded for you)..
9 | note: folders are identified on my drive with their experiment_identifier
10 | for example heavy-spa-xception-adam-1e-05-imnet is (heavy augmentation,spatial stream,xception architecture,adam optimizer with lr = 1e-05 pretrained on imagenet)
11 | this long experiment_identifier is given to drive manager and downloaded automatically and continue training from that checkpoint
12 | view my experiments :https://drive.google.com/drive/folders/1B82anWV8Mb4iHYmOp9tIR9aOTlfllwsD
13 | to make your own experiments on your drive you will need to modify DriveManager at utils.drive_manager and use some other long experiment_identifier
14 | for example make this personal.heavy-mot-xception-adam-1e-05-imnet as suffix at line 31
15 |
16 | 4. As the checkpoint is downloaded or not found the trainer will start from scratch or to continue from where it stopped (the checkpoint)
17 |
18 | note: validation is done by spatialValidationCallback which validates on the given dataset evaluation section
19 | """
20 | from functools import partial
21 |
22 | import frame_dataloader
23 | import utils.training_utils as eval_globals
24 | from configs.spatial_configs import *
25 | from evaluation import legacy_load_model, get_batch_size
26 | from evaluation.evaluation import *
27 | from models.spatial_models import *
28 | from utils import get_augmenter_text
29 | from utils.drive_manager import DriveManager
30 |
31 | ################################################################################
32 | """Files, paths & identifier"""
33 | suffix = "test" # put your name or anything(your crush :3) :D
34 | experiment_identifier = suffix + ("" if suffix == "" else "-") + get_augmenter_text(augmenter_level) + "-spa-" + model_name + "-" + ("adam" if is_adam else "SGD") + "-" + str(lr) + "-" + ("imnet" if pretrained else "scrat")
35 | log_file = "spatial.log"
36 | log_stream = open("spatial.log", "a")
37 | h5py_file = "spatial.h5"
38 | pred_file = "spatial.preds"
39 | ################################################################################
40 | """Checking latest"""
41 | print(experiment_identifier)
42 | num_actions = 101
43 | print("Number of workers:", workers, file=log_stream)
44 | drive_manager = DriveManager(experiment_identifier)
45 | checkpoint_found, zip_file_name = drive_manager.get_latest_snapshot()
46 | ################################################################################
47 | # you need to send it as callback before keras reduce on plateau
48 | SpatialValidationCallback = partial(eval_globals.get_validation_callback,
49 | log_stream=log_stream,
50 | validate_every=validate_every,
51 | testing_samples_per_video=testing_samples_per_video,
52 | pred_file=pred_file, h5py_file=h5py_file, drive_manager=drive_manager, log_file=log_file)
53 |
54 | data_loader = partial(frame_dataloader.SpatialDataLoader,
55 | testing_samples_per_video=testing_samples_per_video,
56 | augmenter_level=augmenter_level,
57 | log_stream=log_stream)
58 |
59 | if checkpoint_found:
60 | # restore the model
61 | print("Model restored")
62 | eval_globals.best_video_level_accuracy_1 = float(zip_file_name.split("-")[1])
63 | print("Current Best", eval_globals.best_video_level_accuracy_1)
64 | spatial_model_restored = legacy_load_model(filepath=h5py_file, custom_objects={'sparse_categorical_cross_entropy_loss': sparse_categorical_cross_entropy_loss, "acc_top_1": acc_top_1, "acc_top_5": acc_top_5})
65 |
66 | # init data loader
67 | train_loader, test_loader, test_video_level_label = data_loader(width=int(spatial_model_restored.inputs[0].shape[1]), height=int(spatial_model_restored.inputs[0].shape[2]), batch_size=get_batch_size(spatial_model_restored, spatial=True)).run()
68 |
69 | # training
70 | spatial_model_restored.fit_generator(train_loader,
71 | steps_per_epoch=len(train_loader), # generates a batch per step
72 | epochs=epochs,
73 | use_multiprocessing=False, workers=workers,
74 | # validation_data=gen_test(), validation_steps=len(test_loader.dataset)
75 | callbacks=[SpatialValidationCallback(model=spatial_model_restored, test_loader=test_loader, test_video_level_label=test_video_level_label), # returns callback instance
76 | keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=validate_every, verbose=1)],
77 | initial_epoch=int(zip_file_name.split("-")[0])) # get epoch number
78 |
79 | else:
80 | # init the model
81 | print("Starting from scratch")
82 |
83 | if model_name == "resnet":
84 | model = ResNet50SpatialCNN(num_classes=num_actions, is_tesla_k80=is_tesla_k80, pre_trained=True if pretrained else False)
85 | elif model_name == "xception":
86 | model = XceptionSpatialCNN(num_classes=num_actions, is_tesla_k80=is_tesla_k80, pre_trained=True if pretrained else False)
87 | elif model_name == "vgg":
88 | model = VGGSpatialCNN(num_classes=num_actions, is_tesla_k80=is_tesla_k80, pre_trained=True if pretrained else False)
89 | elif model_name == "mobilenet":
90 | model = MobileSpatialCNN(num_classes=num_actions, is_tesla_k80=is_tesla_k80, pre_trained=True if pretrained else False)
91 |
92 | # noinspection PyUnboundLocalVariable
93 | keras_spatial_model = model.get_keras_model()
94 |
95 | # init data loader
96 | train_loader, test_loader, test_video_level_label = data_loader(**model.get_loader_configs()).run() # batch_size, width , height)
97 |
98 | keras_spatial_model.compile(optimizer=keras.optimizers.Adam(lr=lr) if is_adam else keras.optimizers.SGD(lr=lr, momentum=0.9), loss=sparse_categorical_cross_entropy_loss, metrics=[acc_top_1, acc_top_5])
99 |
100 | keras_spatial_model.summary(print_fn=lambda *args: print(args, file=log_stream))
101 | keras_spatial_model.summary()
102 | log_stream.flush()
103 |
104 | # training
105 | keras_spatial_model.fit_generator(train_loader,
106 | steps_per_epoch=len(train_loader), # generates a batch per step
107 | epochs=epochs,
108 | use_multiprocessing=False, workers=workers,
109 | # validation_data=gen_test(), validation_steps=len(test_loader.dataset)
110 | callbacks=[SpatialValidationCallback(model=keras_spatial_model, test_loader=test_loader, test_video_level_label=test_video_level_label), # returns callback instance
111 | keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=validate_every, verbose=1)],
112 | )
113 |
--------------------------------------------------------------------------------
/testing video samples/v_Archery_g02_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Archery_g02_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_BabyCrawling_g18_c06.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BabyCrawling_g18_c06.avi
--------------------------------------------------------------------------------
/testing video samples/v_BabyCrawling_g19_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BabyCrawling_g19_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_BalanceBeam_g08_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BalanceBeam_g08_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_BalanceBeam_g13_c05.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BalanceBeam_g13_c05.avi
--------------------------------------------------------------------------------
/testing video samples/v_BasketballDunk_g22_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BasketballDunk_g22_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_BenchPress_g01_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BenchPress_g01_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_Biking_g01_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Biking_g01_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_Biking_g10_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Biking_g10_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_Biking_g19_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Biking_g19_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_Biking_g20_c06.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Biking_g20_c06.avi
--------------------------------------------------------------------------------
/testing video samples/v_Billiards_g15_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Billiards_g15_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_BlowDryHair_g07_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BlowDryHair_g07_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_BlowDryHair_g13_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BlowDryHair_g13_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_BodyWeightSquats_g01_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BodyWeightSquats_g01_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_BodyWeightSquats_g04_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BodyWeightSquats_g04_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_Bowling_g22_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Bowling_g22_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_BoxingPunchingBag_g01_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BoxingPunchingBag_g01_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_BoxingPunchingBag_g18_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BoxingPunchingBag_g18_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_BoxingSpeedBag_g04_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BoxingSpeedBag_g04_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_BoxingSpeedBag_g09_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BoxingSpeedBag_g09_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_BoxingSpeedBag_g12_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BoxingSpeedBag_g12_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_BoxingSpeedBag_g23_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BoxingSpeedBag_g23_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_BreastStroke_g03_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BreastStroke_g03_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_BrushingTeeth_g17_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BrushingTeeth_g17_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_BrushingTeeth_g20_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_BrushingTeeth_g20_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_CliffDiving_g02_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_CliffDiving_g02_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_CricketBowling_g02_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_CricketBowling_g02_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_CuttingInKitchen_g20_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_CuttingInKitchen_g20_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_CuttingInKitchen_g25_c05.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_CuttingInKitchen_g25_c05.avi
--------------------------------------------------------------------------------
/testing video samples/v_Diving_g02_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Diving_g02_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_Diving_g03_c07.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Diving_g03_c07.avi
--------------------------------------------------------------------------------
/testing video samples/v_Diving_g04_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Diving_g04_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_Diving_g16_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Diving_g16_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_Diving_g20_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Diving_g20_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_Fencing_g15_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Fencing_g15_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_Fencing_g15_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Fencing_g15_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_FieldHockeyPenalty_g11_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_FieldHockeyPenalty_g11_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_FieldHockeyPenalty_g13_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_FieldHockeyPenalty_g13_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_FrontCrawl_g23_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_FrontCrawl_g23_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_Haircut_g07_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Haircut_g07_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_HammerThrow_g10_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_HammerThrow_g10_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_HammerThrow_g23_c05.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_HammerThrow_g23_c05.avi
--------------------------------------------------------------------------------
/testing video samples/v_Hammering_g12_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Hammering_g12_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_Hammering_g17_c05.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Hammering_g17_c05.avi
--------------------------------------------------------------------------------
/testing video samples/v_HighJump_g02_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_HighJump_g02_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_HighJump_g19_c05.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_HighJump_g19_c05.avi
--------------------------------------------------------------------------------
/testing video samples/v_HorseRace_g24_c05.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_HorseRace_g24_c05.avi
--------------------------------------------------------------------------------
/testing video samples/v_JavelinThrow_g05_c05.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_JavelinThrow_g05_c05.avi
--------------------------------------------------------------------------------
/testing video samples/v_JavelinThrow_g21_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_JavelinThrow_g21_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_JavelinThrow_g22_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_JavelinThrow_g22_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_JavelinThrow_g23_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_JavelinThrow_g23_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_JavelinThrow_g24_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_JavelinThrow_g24_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_Kayaking_g12_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Kayaking_g12_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_Knitting_g20_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Knitting_g20_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_LongJump_g04_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_LongJump_g04_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_LongJump_g15_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_LongJump_g15_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_LongJump_g15_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_LongJump_g15_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_MoppingFloor_g03_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_MoppingFloor_g03_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_PizzaTossing_g01_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_PizzaTossing_g01_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_PizzaTossing_g14_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_PizzaTossing_g14_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_PizzaTossing_g18_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_PizzaTossing_g18_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_PlayingCello_g02_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_PlayingCello_g02_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_PlayingDaf_g10_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_PlayingDaf_g10_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_PlayingDhol_g17_c06.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_PlayingDhol_g17_c06.avi
--------------------------------------------------------------------------------
/testing video samples/v_PlayingFlute_g05_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_PlayingFlute_g05_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_PlayingGuitar_g22_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_PlayingGuitar_g22_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_PlayingTabla_g14_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_PlayingTabla_g14_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_PoleVault_g04_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_PoleVault_g04_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_PommelHorse_g17_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_PommelHorse_g17_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_Punch_g22_c07.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Punch_g22_c07.avi
--------------------------------------------------------------------------------
/testing video samples/v_RockClimbingIndoor_g09_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_RockClimbingIndoor_g09_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_RockClimbingIndoor_g11_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_RockClimbingIndoor_g11_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_RockClimbingIndoor_g25_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_RockClimbingIndoor_g25_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_RopeClimbing_g01_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_RopeClimbing_g01_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_RopeClimbing_g04_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_RopeClimbing_g04_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_Rowing_g14_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Rowing_g14_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_Rowing_g24_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Rowing_g24_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_SalsaSpin_g12_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_SalsaSpin_g12_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_ShavingBeard_g03_c05.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_ShavingBeard_g03_c05.avi
--------------------------------------------------------------------------------
/testing video samples/v_ShavingBeard_g24_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_ShavingBeard_g24_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_Shotput_g13_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Shotput_g13_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_Skiing_g14_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Skiing_g14_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_Skijet_g07_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Skijet_g07_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_SkyDiving_g05_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_SkyDiving_g05_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_SoccerPenalty_g17_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_SoccerPenalty_g17_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_StillRings_g03_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_StillRings_g03_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_StillRings_g18_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_StillRings_g18_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_Surfing_g05_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Surfing_g05_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_Surfing_g17_c07.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Surfing_g17_c07.avi
--------------------------------------------------------------------------------
/testing video samples/v_Swing_g14_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Swing_g14_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_TennisSwing_g14_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_TennisSwing_g14_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_ThrowDiscus_g02_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_ThrowDiscus_g02_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_Typing_g16_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_Typing_g16_c03.avi
--------------------------------------------------------------------------------
/testing video samples/v_VolleyballSpiking_g17_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_VolleyballSpiking_g17_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_WalkingWithDog_g15_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_WalkingWithDog_g15_c01.avi
--------------------------------------------------------------------------------
/testing video samples/v_WallPushups_g01_c04.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_WallPushups_g01_c04.avi
--------------------------------------------------------------------------------
/testing video samples/v_WallPushups_g04_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_WallPushups_g04_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_WritingOnBoard_g11_c02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_WritingOnBoard_g11_c02.avi
--------------------------------------------------------------------------------
/testing video samples/v_YoYo_g25_c03.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/testing video samples/v_YoYo_g25_c03.avi
--------------------------------------------------------------------------------
/upload.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # get drive credentials files
4 | cp -a "/media/mohammed-alaa/Core/current tasks/Storage/drive/." ./utils
5 |
6 | # create zipped file of the code
7 | zip upload.zip -r utils/*.txt *.py */*.py
8 |
9 | # use transfer sh to upload the zipped file
10 | curl --upload-file ./upload.zip https://transfer.sh/upload.zip --silent
11 |
12 | # clean and print the link
13 | rm upload.zip
14 |
15 | rm ./utils/cred*.txt
16 | printf "\n"
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | Some helper functions for logging and named constants
6 | """
7 |
8 | import sys
9 |
10 |
11 | def log(*args, file=None):
12 | """log to a file and console"""
13 | if file:
14 | print(*args, file=file)
15 | file.flush()
16 | print(*args)
17 | sys.stdout.flush()
18 |
19 |
20 | def get_augmenter_text(augmenter_level):
21 | """augmenter level text"""
22 | if augmenter_level == 0:
23 | augmenter_text = "heavy"
24 | elif augmenter_level == 1:
25 | augmenter_text = "medium"
26 | else: # 2
27 | augmenter_text = "simple"
28 |
29 | return augmenter_text
30 |
--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/utils/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/drive_manager.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/utils/__pycache__/drive_manager.cpython-36.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/training_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/utils/__pycache__/training_utils.cpython-36.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/zip_manager.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mohammed-elkomy/two-stream-action-recognition/803128ed2d765d987bc2429514ba974c0c58a7f4/utils/__pycache__/zip_manager.cpython-36.pyc
--------------------------------------------------------------------------------
/utils/training_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Created by mohammed-alaa
3 | """
4 | import pickle
5 |
6 | import tensorflow as tf
7 |
8 | from evaluation.evaluation import eval_model
9 | from utils import log
10 |
11 | """Evaluation best value over the course of training"""
12 | best_video_level_accuracy_1 = 0
13 | last_video_level_loss = 5.0
14 |
15 |
16 | def get_validation_callback(log_stream, validate_every, model, test_loader, test_video_level_label, testing_samples_per_video, log_file, pred_file, h5py_file, drive_manager):
17 | """
18 | Validation callback: keeps track of validation over the course of training done by keras
19 | """
20 |
21 | class ValidationCallback(tf.keras.callbacks.Callback):
22 | # def on_batch_end(self, batch, logs={}):
23 | #
24 | # metrics_log = ''
25 | # for k in self.params['metrics']:
26 | # if k in logs:
27 | # val = logs[k]
28 | # if abs(val) > 1e-3:
29 | # metrics_log += ' - %s: %.4f' % (k, val)
30 | # else:
31 | # metrics_log += ' - %s: %.4e' % (k, val)
32 | # print('{} ... {}'.format(
33 | # self.params['samples'],
34 | # metrics_log))
35 | #
36 | # print(batch)
37 | # print("="*50)
38 |
39 | def on_epoch_end(self, epoch, logs=None):
40 | """
41 | View validation metrics every "validate_every" epochs
42 | since training epoch is just very short compared to validation epoch >> frame level training >> video level validation
43 | """
44 | global best_video_level_accuracy_1
45 | global last_video_level_loss
46 | epoch_one_based = epoch + 1
47 | log("Epoch", epoch_one_based, file=log_stream)
48 |
49 | if epoch_one_based % validate_every == 0 and epoch_one_based > 0:
50 | video_level_loss, video_level_accuracy_1, video_level_accuracy_5, test_video_level_preds = eval_model(model=model,
51 | test_loader=test_loader,
52 | test_video_level_label=test_video_level_label,
53 | testing_samples_per_video=testing_samples_per_video) # 3783*(testing_samples_per_video=19)= 71877 frames of videos
54 | if video_level_accuracy_1 > best_video_level_accuracy_1:
55 | log("Epoch", epoch_one_based, "Established new baseline:", video_level_accuracy_1, file=log_stream)
56 | best_video_level_accuracy_1 = video_level_accuracy_1
57 |
58 | # save the model and pickle
59 | #
60 | else:
61 | log("Epoch", epoch_one_based, "Baseline:", best_video_level_accuracy_1, "but got:", video_level_accuracy_1, file=log_stream)
62 |
63 | last_video_level_loss = video_level_loss
64 |
65 | log("=" * 100 + "\n(Training:)Epoch", epoch_one_based, "prec@1", logs["acc_top_1"], "prec@5", logs["acc_top_5"], "loss", logs["loss"], file=log_stream)
66 | log("(Validation:)Epoch", epoch_one_based, "prec@1", video_level_accuracy_1, "prec@5", video_level_accuracy_5, "loss", video_level_loss, file=log_stream)
67 |
68 | logs['val_loss'] = video_level_loss
69 |
70 | log_stream.flush()
71 | with open(pred_file, 'wb') as f:
72 | pickle.dump((dict(test_video_level_preds), testing_samples_per_video), f)
73 | model.save(h5py_file)
74 |
75 | drive_manager.upload_project_files(
76 | files_list=[log_file, pred_file, h5py_file],
77 | snapshot_name=str(epoch_one_based) + "-" + "{0:.5f}".format(best_video_level_accuracy_1) + "-" + "{0:.5f}".format(video_level_accuracy_1))
78 |
79 | else:
80 | logs['val_loss'] = last_video_level_loss
81 | log_stream.flush()
82 |
83 | return ValidationCallback() # returns callback instance to be consumed by keras
84 |
--------------------------------------------------------------------------------
/utils/zip_manager.py:
--------------------------------------------------------------------------------
1 | """
2 | ********************************
3 | * Created by mohammed-alaa *
4 | ********************************
5 | a simple class adds set of files and folders into single zip file .. used extensively in saving checkpoints,logs or predictions
6 | """
7 | import datetime
8 | import os
9 | import zipfile
10 |
11 |
12 | class ZipFile:
13 | def __init__(self, file_name):
14 | self.zipf = zipfile.ZipFile(file_name, 'w', zipfile.ZIP_DEFLATED)
15 |
16 | def get_true_size(self):
17 | size = sum([zinfo.file_size for zinfo in self.zipf.filelist])
18 | zip_mb = float(size) / 1024 / 1024 # kB
19 | return zip_mb
20 |
21 | def get_compressed_size(self):
22 | size = sum([zinfo.compress_size for zinfo in self.zipf.filelist])
23 | zip_mb = float(size) / 1024 / 1024 # kB
24 | return zip_mb
25 |
26 | def print_info(self, verbose=False):
27 | print("%s,total data size is :%.3f mb,compressed :%.3f mb" % (self.zipf.filename, self.get_true_size(), self.get_compressed_size()))
28 | print("Files are :")
29 | for info in self.zipf.infolist():
30 | print(info.filename)
31 | if verbose:
32 | print(' Comment :', info.comment)
33 | mod_date = datetime.datetime(*info.date_time)
34 | print(' Modified :', mod_date)
35 | if info.create_system == 0:
36 | system = 'Windows'
37 | elif info.create_system == 3:
38 | system = 'Unix'
39 | else:
40 | system = 'UNKNOWN'
41 | print(' System :', system)
42 | print(' ZIP version :', info.create_version)
43 |
44 | print(' Compressed :', info.compress_size, 'bytes')
45 | print(' Uncompressed:', info.file_size, 'bytes')
46 | print()
47 |
48 | def add_directory(self, path):
49 | for root, dirs, files in os.walk(path):
50 | for file in files:
51 | self.zipf.write(os.path.join(root, file))
52 |
53 | def add_file(self, path):
54 | self.zipf.write(path)
55 |
56 | def __del__(self):
57 | # self.print_info()
58 | self.zipf.close()
59 |
60 | # import tarfile
61 |
62 | # USAGE
63 | # myzipfile = ZipFile("comp.zip")
64 | # myzipfile.addDir('./Bot/')
65 | # #myzipfile.addFile('./Bot/')
66 | # myzipfile.print_info()
67 | #
68 | # for root, dirs, files in os.walk('./Bot'):
69 | # print((root, dirs, files))
70 | #
71 | #
72 |
73 | #
74 | # tar = tarfile.open("TarName.tar.gz", "w:gz")
75 | # tar.add("comp.zip", arcname="comp.zip")
76 | # tar.close()
77 |
--------------------------------------------------------------------------------