├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── aggregation_modules.py
├── attention_modules.py
├── average_precision_calculator.py
├── eval.py
├── eval_util.py
├── export_model.py
├── frame_level_models.py
├── inference.py
├── losses.py
├── mean_average_precision_calculator.py
├── model_utils.py
├── models.py
├── module_utils.py
├── modules.py
├── paper
    └── Learnable_Pooling_Methods_for_Video_Classification.pdf
├── pathmagic.py
├── readers.py
├── rnn_modules.py
├── scripts
    ├── batch_evaluate.py
    ├── generate_gcloud_evaluation.py
    ├── generate_gcloud_inference.py
    ├── generate_gcloud_train.py
    └── generate_gcloud_train_valid.py
├── train.py
├── transformer_utils.py
├── utils.py
├── video_level_models.py
└── video_pooling_modules.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | ## Contents sepcific to MAC OS
107 | # General
108 | .DS_Store
109 | .AppleDouble
110 | .LSOverride
111 | 
112 | # Icon must end with two \r
113 | Icon
114 | 
115 | 
116 | # Thumbnails
117 | ._*
118 | 
119 | # Files that might appear in the root of a volume
120 | .DocumentRevisions-V100
121 | .fseventsd
122 | .Spotlight-V100
123 | .TemporaryItems
124 | .Trashes
125 | .VolumeIcon.icns
126 | .com.apple.timemachine.donotpresent
127 | 
128 | # Directories potentially created on remote AFP share
129 | .AppleDB
130 | .AppleDesktop
131 | Network Trash Folder
132 | Temporary Items
133 | .apdisk
134 | 
135 | # Research file
136 | history/
137 | 
138 | .idea/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Learnable Pooling Methods for Video Classification
 2 | The repository is based on the starter code provided by Google AI. It contains a code for training and evaluating models for [YouTube-8M](https://research.google.com/youtube8m/) dataset. The detailed table of contents and descriptions can be found at [original repository](https://github.com/google/youtube-8m).
 3 | 
 4 | The repository contains models from team "Deep Topology". Our approach was accepted in [ECCV - The 2nd Workshop on YouTube-8M Large-Scale Video Understanding](https://research.google.com/youtube8m/workshop2018/index.html). The presentation is accessible in ECCV Workshop page.
 5 | 
 6 | Presentation: TBA \
 7 | Paper: [Link](paper/Learnable_Pooling_Methods_for_Video_Classification.pdf), [Arxiv](https://arxiv.org/abs/1810.00530)
 8 |     
 9 | # Usage
10 | In [frame_level_models.py](frame_level_models.py), prototype 1, 2 and 3 refer to sections 3.1, 3.2 and 3.2 in the paper. The detailed instructions instructions to train and evaluate the model can be found at [YT8M repository](https://github.com/google/youtube-8m). The following is the example training command to reproduce the result.
11 | ### Prototype 1 (Attention Enhanced NetVLAD)
12 | ```
13 | python train.py --train_data_pattern="<path to train .tfrecord>" --model=NetVladV1 --train_dir="<path for model checkpoints>" --frame_features=True --feature_names="rgb,audio" --feature_sizes="1024,128" --batch_size=80 --base_learning_rate=0.0002 --netvlad_cluster_size=256 --netvlad_hidden_size=512 --iterations=256 --learning_rate_decay=0.85
14 | ```
15 | ### Prototype 2 (NetVLAD with Attention Based Cluster Similarities)
16 | ```
17 | python train.py --train_data_pattern="<path to train .tfrecord>" --model=NetVladV2 --train_dir="<path for model checkpoints>" --frame_features=True --feature_names="rgb,audio" --feature_sizes="1024,128" --batch_size=80 --base_learning_rate=0.0002 --netvlad_cluster_size=256 --netvlad_hidden_size=512 --iterations=256 --learning_rate_decay=0.85
18 | ```
19 | ### Prototype 3 (Regularized Function Approximation Approach)
20 | ```
21 | TBD
22 | ```
23 | 
24 | # Changes
25 | - **1.00** (31 August 2018)
26 |     - Initial public release
27 | - **2.00** (30 September 2018)
28 |     - Code cleaning
29 |     - Model usage
30 |     
31 | # Citations
32 | If you find our apporaches useful, please cite our paper.
33 | ```
34 | @article{kmiec2018learnable,
35 |   title={Learnable Pooling Methods for Video Classification},
36 |   author={Kmiec, Sebastian and Bae, Juhan and An, Ruijian},
37 |   journal={arXiv preprint arXiv:1810.00530},
38 |   year={2018}
39 | }
40 | ```
41 | 
42 | # Contributors (Alphabetical Order)
43 | - [Ruijian An](https://github.com/RuijianSZ)
44 | - [Juhan Bae](https://github.com/pomonam)
45 | - [Sebastian Kmiec](https://github.com/sebastiankmiec)
46 | 
47 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS-IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Copyright 2018 Juhan Bae, Ruijian An Inc. All Rights Reserved.
16 | #
17 | # Licensed under the Apache License, Version 2.0 (the "License");
18 | # you may not use this file except in compliance with the License.
19 | # You may obtain a copy of the License at
20 | #
21 | #      http://www.apache.org/licenses/LICENSE-2.0
22 | #
23 | # Unless required by applicable law or agreed to in writing, software
24 | # distributed under the License is distributed on an "AS-IS" BASIS,
25 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 | # See the License for the specific language governing permissions and
27 | # limitations under the License.
28 | # noinspection PyUnresolvedReferences
29 | 


--------------------------------------------------------------------------------
/aggregation_modules.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Deep Topology Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """ Modules for feature pooling and aggregation. """
 16 | 
 17 | import tensorflow as tf
 18 | import modules
 19 | 
 20 | 
 21 | class IndirectClusterMeanPoolModule(modules.BaseModule):
 22 |     """ Mean pooling method. Mean is computed from weighted average
 23 |     inspired from self-attention mechanism (indirect clustering)
 24 |     """
 25 |     def __init__(self, l2_normalize):
 26 |         """ Initialize IndirectClusterMaxMeanPoolModule
 27 |         :param l2_normalize: bool
 28 |         """
 29 |         self.l2_normalize = l2_normalize
 30 | 
 31 |     def forward(self, t_inputs, c_inputs, **unused_params):
 32 |         """ Forward method for max & mean pooling with indirect clustering (self-attention).
 33 |         :param t_inputs: batch_size x max_frames x num_features
 34 |         :param c_inputs: batch_size x max_frames x num_features
 35 |         :return: batch_size x feature_size
 36 |         """
 37 |         attention = tf.matmul(t_inputs, tf.transpose(t_inputs, perm=[0, 2, 1]))
 38 |         # -> batch_size x max_frames x max_frames
 39 |         attention = tf.expand_dims(attention, -1)
 40 |         # Zero-out negative weight.
 41 |         attention = tf.nn.relu(attention)
 42 | 
 43 |         attention = tf.reduce_sum(attention, axis=2)
 44 |         # -> batch_size x max_frames x 1
 45 |         attention = tf.nn.softmax(attention, axis=1)
 46 | 
 47 |         mean_pool = tf.reduce_mean(tf.multiply(c_inputs, attention), axis=1)
 48 |         # -> batch_size x num_features
 49 | 
 50 |         if self.l2_normalize:
 51 |             mean_pool = tf.nn.l2_normalize(mean_pool, 1)
 52 | 
 53 |         return mean_pool
 54 | 
 55 | 
 56 | class MeanStdPoolModule(modules.BaseModule):
 57 |     """ Mean-Std pooling method.
 58 |     """
 59 |     def __init__(self, l2_normalize):
 60 |         """ Initialize Mean STD module.
 61 |         :param l2_normalize:
 62 |         """
 63 |         self.l2_normalize = l2_normalize
 64 | 
 65 |     def forward(self, inputs, **unused_params):
 66 |         """ Forward method for MeanStdPoolModule.
 67 |         :param inputs: batch_size x max_frames x num_features
 68 |         :return: batch_size x feature_size
 69 |         """
 70 |         moments = tf.reduce_mean(inputs, 1)
 71 |         return moments
 72 | 
 73 | 
 74 | class IndirectClusterMaxMeanPoolModule(modules.BaseModule):
 75 |     """ Max-Mean pooling method. Mean is computed from weighted average
 76 |     inspired from self-attention mechanism (indirect clustering)
 77 |     """
 78 |     def __init__(self, l2_normalize):
 79 |         """ Initialize IndirectClusterMaxMeanPoolModule
 80 |         :param l2_normalize: bool
 81 |         """
 82 |         self.l2_normalize = l2_normalize
 83 | 
 84 |     def forward(self, inputs, **unused_params):
 85 |         """ Forward method for max & mean pooling with indirect clustering (self-attention).
 86 |             Where
 87 |         :param inputs: batch_size x max_frames x num_features
 88 |         :return: batch_size x feature_size
 89 |         """
 90 |         attention   = tf.matmul(inputs, tf.transpose(inputs, perm=[0, 2, 1]))
 91 |         # -> batch_size x max_frames x max_frames
 92 |         attention = tf.expand_dims(attention, -1)
 93 |         attention = tf.nn.relu(attention)
 94 | 
 95 |         attention = tf.reduce_sum(attention, axis=2)
 96 |         # -> batch_size x max_frames x 1
 97 |         attention = tf.nn.softmax(attention, axis=1)
 98 | 
 99 |         mean_pool = tf.reduce_mean(tf.multiply(inputs, attention), axis=1)
100 |         max_pool = tf.reduce_max(inputs, axis=1)
101 |         # -> batch_size x num_features
102 | 
103 |         if self.l2_normalize:
104 |             mean_pool = tf.nn.l2_normalize(mean_pool, 1)
105 |             max_pool = tf.nn.l2_normalize(max_pool, 1)
106 | 
107 |         concat_pool = tf.concat([mean_pool, max_pool], 1)
108 |         return concat_pool
109 | 
110 | 
111 | class MaxMeanPoolingModule(modules.BaseModule):
112 |     """ Max-Mean pooling method. """
113 |     def __init__(self, l2_normalize=True):
114 |         """ Initialize MaxMeanPoolingModule.
115 |         :param l2_normalize: bool
116 |         """
117 |         self.l2_normalize = l2_normalize
118 | 
119 |     def forward(self, inputs, **unused_params):
120 |         """ Forward method for mean & max pooling.
121 |         :param inputs: batch_size x max_frames x num_features
122 |         :return: batch_size x feature_size
123 |         """
124 |         max_pooled = tf.reduce_max(inputs, 1)
125 |         avg_pooled = tf.reduce_mean(inputs, 1)
126 | 
127 |         if self.l2_normalize:
128 |             max_pooled = tf.nn.l2_normalize(max_pooled, 1)
129 |             avg_pooled = tf.nn.l2_normalize(avg_pooled, 1)
130 |         # -> batch_size x num_features
131 | 
132 |         concat = tf.concat([max_pooled, avg_pooled], 1)
133 |         return concat
134 | 
135 | 
136 | class MaxPoolingModule(modules.BaseModule):
137 |     """ Max pooling method. """
138 |     def __init__(self, l2_normalize=False):
139 |         """ Initialize MaxPoolingModule.
140 |         :param l2_normalize: bool
141 |         """
142 |         self.l2_normalize = l2_normalize
143 | 
144 |     def forward(self, inputs, **unused_params):
145 |         """ Forward method for max pooling.
146 |         :param inputs: batch_size x max_frames x num_features
147 |         :return: batch_size x feature_size
148 |         """
149 |         return tf.reduce_max(inputs, 1)
150 | 
151 |       
152 | class MeanPooling(modules.BaseModule):
153 |     """ Average pooling method. """
154 |     def __init__(self, l2_normalize=False):
155 |         """ Initialize MeanPooling.
156 |         :param l2_normalize: bool
157 |         """
158 |         self.l2_normalize = l2_normalize
159 | 
160 |     def forward(self, inputs, **unused_params):
161 |         """ Forward method for mean pooling.
162 |         :param inputs: batch_size x max_frames x num_features
163 |         :return: batch_size x feature_size
164 |         """
165 |         return tf.reduce_mean(inputs, 1)
166 | 
167 |       
168 | class GemPoolingModule(modules.BaseModule):
169 |     """ Generalized Mean Pooling. """
170 |     def __init__(self, l2_normalize=False, eps=1e-6):
171 |         """ Initialize GemPoolingModule.
172 |         :param l2_normalize: bool
173 |         """
174 |         self.l2_normalize = l2_normalize
175 |         self.eps = eps
176 | 
177 |     # TODO: Implementation is incorrect / incomplete.
178 |     def forward(self, inputs, **unused_params):
179 |         """ Forward method for GeM pooling
180 |         :param inputs: batch_size x max_frames x num_features
181 |         :return: batch_size x feature_size
182 |         """
183 |         p = tf.get_variable("p",
184 |                             shape=[1])
185 |         # Clip some values.
186 |         frames = tf.clip_by_value(inputs, clip_value_min=self.eps, clip_value_max=None)
187 |         frames = tf.pow(frames, p)
188 |         frames = tf.reduce_mean(frames, 1)
189 |         frames = tf.pow(frames, 1. / p)
190 |         return frames
191 | 


--------------------------------------------------------------------------------
/attention_modules.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Deep Topology All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from tensorflow.python.ops import nn
 16 | import tensorflow as tf
 17 | import tensorflow.contrib.slim as slim
 18 | import math
 19 | import modules
 20 | 
 21 | 
 22 | class OneFcAttention(modules.BaseModule):
 23 |     def __init__(self, num_features, num_frames, num_cluster, do_shift=True):
 24 |         self.num_feature = num_features
 25 |         self.num_frames = num_frames
 26 |         self.num_cluster = num_cluster
 27 |         self.do_shift = do_shift
 28 | 
 29 |     def forward(self, inputs, **unused_params):
 30 |         attention_weights = \
 31 |             tf.get_variable("one_fc_attention_weight",
 32 |                             [self.num_feature, self.num_cluster],
 33 |                             initializer=tf.contrib.layers.xavier_initializer())
 34 |         attention = tf.matmul(inputs, attention_weights)
 35 |         attention = tf.reshape(attention, [-1, self.num_frames, self.num_cluster])
 36 |         attention = tf.scalar_mul(1 / math.sqrt(self.num_feature), attention)
 37 |         attention = tf.nn.softmax(attention, dim=1)
 38 | 
 39 |         reshaped_inputs = tf.reshape(inputs, [-1, self.num_frames, self.num_feature])
 40 |         activation = tf.transpose(attention, perm=[0, 2, 1])
 41 |         activation = tf.matmul(activation, reshaped_inputs)
 42 |         # -> batch_size x num_cluster x feature_size
 43 | 
 44 |         reshaped_activation = tf.reshape(activation, [-1, self.num_feature])
 45 | 
 46 |         if self.do_shift:
 47 |             alpha = \
 48 |                 tf.get_variable("alpha",
 49 |                                 [1],
 50 |                                 initializer=tf.constant_initializer(1))
 51 |             beta = \
 52 |                 tf.get_variable("beta",
 53 |                                 [1],
 54 |                                 initializer=tf.constant_initializer(0.01))
 55 | 
 56 |             reshaped_activation = alpha * reshaped_activation
 57 |             reshaped_activation = reshaped_activation + beta
 58 |             reshaped_activation = tf.nn.l2_normalize(reshaped_activation, 1)
 59 |             reshaped_activation = tf.scalar_mul(1 / math.sqrt(self.num_cluster), reshaped_activation)
 60 | 
 61 |         activation = tf.reshape(reshaped_activation, [-1, self.num_cluster * self.num_feature])
 62 | 
 63 |         return activation
 64 | 
 65 | 
 66 | class MultiHeadAttention(modules.BaseModule):
 67 |     def __init__(self, num_heads, num_units, max_frames, block_id):
 68 |         """ Initialize MultiHeadAttention
 69 |         :param num_heads: Number of self-attention modules
 70 |         :param num_units: last dimension of Q, K, V
 71 |         """
 72 |         self.num_heads = num_heads
 73 |         self.num_units = num_units
 74 |         self.max_frames = max_frames
 75 |         self.block_id = block_id
 76 | 
 77 |     def self_attention(self, inputs, scope_id):
 78 |         with tf.variable_scope("Block{}Layer{}".format(self.block_id, scope_id), reuse=tf.AUTO_REUSE):
 79 |             # Calculate query, key, value pair
 80 |             Q = tf.layers.dense(inputs, self.num_units, activation=tf.nn.relu)
 81 |             K = tf.layers.dense(inputs, self.num_units, activation=tf.nn.relu)
 82 |             V = tf.layers.dense(inputs, self.num_units, activation=tf.nn.relu)
 83 |             # Q, K, V: -> (batch_size * max_frames) x num_units
 84 | 
 85 |             # Reshape for self-attention calculation
 86 |             Q = tf.reshape(Q, [-1, self.max_frames, self.num_units])
 87 |             K = tf.reshape(K, [-1, self.max_frames, self.num_units])
 88 |             V = tf.reshape(V, [-1, self.max_frames, self.num_units])
 89 |             # Q, K, V: -> batch_size x max_frames x num_units
 90 | 
 91 |             # Self-attention
 92 |             attention = tf.matmul(Q, tf.transpose(K, perm=[0, 2, 1]))
 93 |             # attention: -> batch_size x max_frames x max_frames
 94 |             float_cpy = tf.cast(self.num_units, dtype=tf.float32)
 95 |             attention = tf.divide(attention, tf.sqrt(float_cpy))
 96 |             attention = tf.nn.softmax(tf.divide(attention, tf.sqrt(float_cpy)))
 97 | 
 98 |             output = tf.matmul(attention, V)
 99 |             # output: -> batch_size x max_frames x num_units
100 |             return output
101 | 
102 |     def forward(self, inputs, **unused_params):
103 |         result = self.self_attention(inputs, scope_id=0)
104 |         for i in range(1, self.num_heads):
105 |             result = tf.identity(result)
106 |             output = self.self_attention(inputs, scope_id=i)
107 |             result = tf.concat([result, output], 2)
108 |         # result: -> batch_size x max_frames x (num_units * num_heads)
109 |         return result
110 | 
111 | 
112 | class TransformerEncoderBlock(modules.BaseModule):
113 |     def __init__(self, is_training, num_units, max_frames, feature_size, num_heads, block_id):
114 |         """ Initialize Transformer Encoder block
115 | 
116 |         :param is_training: bool
117 |         :param num_units: Number of hidden units of fully connected layers
118 |         """
119 |         self.is_training = is_training
120 |         self.num_units = num_units
121 |         self.max_frames = max_frames
122 |         self.feature_size = feature_size
123 |         self.num_heads = num_heads
124 |         self.block_id = block_id
125 | 
126 |     def forward(self, inputs, **unused_params):
127 |         """ One block of encoder containing one self-attention layer and one fully connected layer.
128 |         """
129 |         multi_head_layer = MultiHeadAttention(self.num_heads, self.num_units, self.max_frames, self.block_id)
130 | 
131 |         attention_output = multi_head_layer.forward(inputs)
132 |         # output: -> batch_size x max_frames x (num_units * num_heads)
133 | 
134 |         attention_output = tf.reshape(attention_output, [-1, self.num_units * self.num_heads])
135 |         # output: -> (batch_size * max_frames) x (num_units * num_heads)
136 | 
137 |         attention_output = tf.layers.dense(attention_output, self.feature_size, activation=tf.nn.relu)
138 |         # output: -> (batch_size * max_frames) x feature_size
139 | 
140 |         # Residual connection & Layer normalization
141 |         attention_output += inputs
142 |         attention_output = tf.contrib.layers.layer_norm(attention_output)
143 | 
144 |         # 2 layers of 1 x 1 convolution
145 |         output = tf.reshape(attention_output, [-1, self.max_frames, self.feature_size])
146 |         output = tf.layers.conv1d(output, filters=4 * self.num_units, kernel_size=1, activation=tf.nn.relu,
147 |                                   use_bias=True)
148 |         output = tf.layers.conv1d(output, filters=self.num_units, kernel_size=1, activation=None, use_bias=True)
149 | 
150 |         # Residual connection & Layer normalization
151 |         output = tf.contrib.layers.layer_norm(output)
152 |         output = tf.reshape(output, [-1, self.feature_size])
153 | 
154 |         return output
155 | 
156 | 
157 | class PnGateModule(modules.BaseModule):
158 |     def __init__(self, vocab_size, is_training, scope_id=None):
159 |         """ Initialize class PnGateModule.
160 |         :param vocab_size: int
161 |             Size of the classes.
162 |         :param is_training: bool
163 |             True iff the model is being trained.
164 |         :param scope_id: Object
165 |         """
166 |         self.vocab_size = vocab_size
167 |         self.scope_id = scope_id
168 |         self.is_training = is_training
169 | 
170 |     def forward(self, inputs, **unused_params):
171 |         """ PN Gate for correlation learning.
172 |         vocabularies -> P gate -> N gate -> output
173 |         :param inputs: batch_size x vocab_size
174 |         :return: batch_size x vocab_size
175 |         """
176 |         p_gating_weights = \
177 |             tf.get_variable("p_pn_gate",
178 |                             [self.vocab_size, self.vocab_size],
179 |                             initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.vocab_size)))
180 |         n_gating_weights = \
181 |             tf.get_variable("n_pn_gate",
182 |                             [self.vocab_size, self.vocab_size],
183 |                             initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.vocab_size)))
184 | 
185 |         # batch_size x vocab_size, vocab_size x vocab_size --> batch_size x vocab_size
186 |         p_activation = tf.matmul(inputs, p_gating_weights)
187 |         p_activation = tf.nn.relu6(p_activation)
188 |         p_gate = inputs + p_activation
189 | 
190 |         # batch_size x vocab_size, vocab_size x vocab_size --> batch_size x vocab_size
191 |         n_activation = tf.matmul(p_gate, n_gating_weights)
192 |         n_activation = -1 * n_activation
193 |         n_activation = tf.nn.relu6(n_activation)
194 |         n_gate = p_gate + (-1 * n_activation)
195 | 
196 |         output = tf.nn.softmax(n_gate)
197 |         return output
198 | 
199 | 
200 | class NpGateModule(modules.BaseModule):
201 |     def __init__(self, vocab_size, is_training, scope_id=None):
202 |         """ Initialize class NpGateModule.
203 |         :param vocab_size: int
204 |             Size of the classes.
205 |         :param is_training: bool
206 |             True iff the model is being trained.
207 |         :param scope_id: Object
208 |         """
209 |         self.vocab_size = vocab_size
210 |         self.scope_id = scope_id
211 |         self.is_training = is_training
212 | 
213 |     def forward(self, inputs, **unused_params):
214 |         """ PN Gate for correlation learning.
215 |         vocabularies -> N gate -> P gate -> output
216 |         :param inputs: batch_size x vocab_size
217 |         :return: batch_size x vocab_size
218 |         """
219 |         p_gating_weights = \
220 |             tf.get_variable("p_np_gate",
221 |                             [self.vocab_size, self.vocab_size],
222 |                             initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.vocab_size)))
223 |         n_gating_weights = \
224 |             tf.get_variable("n_np_gate",
225 |                             [self.vocab_size, self.vocab_size],
226 |                             initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.vocab_size)))
227 | 
228 |         # batch_size x vocab_size, vocab_size x vocab_size --> batch_size x vocab_size
229 |         n_activation = tf.matmul(inputs, n_gating_weights)
230 |         n_activation = -1 * n_activation
231 |         n_activation = tf.nn.relu6(n_activation)
232 |         n_gate = inputs + (-1 * n_activation)
233 | 
234 |         # batch_size x vocab_size, vocab_size x vocab_size --> batch_size x vocab_size
235 |         p_activation = tf.matmul(n_gate, p_gating_weights)
236 |         p_activation = tf.nn.relu6(p_activation)
237 |         p_gate = n_gate + p_activation
238 |         output = tf.nn.softmax(p_gate)
239 | 
240 |         return output
241 | 
242 | 
243 | class PGateModule(modules.BaseModule):
244 |     def __init__(self, vocab_size, is_training, scope_id=None):
245 |         """ Initialize class PGateModule.
246 |         :param vocab_size: int
247 |             Size of the classes.
248 |         :param is_training: bool
249 |             True iff the model is being trained.
250 |         :param scope_id: Object
251 |         """
252 |         self.vocab_size = vocab_size
253 |         self.scope_id = scope_id
254 |         self.is_training = is_training
255 | 
256 |     def forward(self, inputs, **unused_params):
257 |         """ PN Gate for correlation learning.
258 |         vocabularies -> P gate -> output
259 |         :param inputs: batch_size x vocab_size
260 |         :return: batch_size x vocab_size
261 |         """
262 |         p_gating_weights = \
263 |             tf.get_variable("p_p_gate",
264 |                             [self.vocab_size, self.vocab_size],
265 |                             initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.vocab_size)))
266 | 
267 |         # batch_size x vocab_size, vocab_size x vocab_size --> batch_size x vocab_size
268 |         p_activation = tf.matmul(inputs, p_gating_weights)
269 |         p_activation = tf.nn.relu6(p_activation)
270 |         p_gate = inputs + p_activation
271 |         output = tf.nn.softmax(p_gate)
272 | 
273 |         return output
274 | 
275 | 
276 | class CorNNGateModule(modules.BaseModule):
277 |     def __init__(self, vocab_size, is_training, batch_norm=True, scope_id=None):
278 |         """ Initialize a class CorNNGateModule.
279 |         :param vocab_size: int
280 |             Size of the classes.
281 |         :param is_training: bool
282 |         :param batch_norm: bool
283 |         :param scope_id: int
284 |         """
285 |         self.vocab_size = vocab_size
286 |         self.is_training = is_training
287 |         self.batch_norm = batch_norm
288 |         self.scope_id = scope_id
289 | 
290 |     def forward(self, inputs, **unused_params):
291 |         """ Forward function of CorNNGateModule.
292 |         :param inputs: batch_size x vocab_size
293 |         :return: batch_size x vocab_size
294 |         """
295 |         fc1_out = slim.fully_connected(
296 |             inputs=inputs,
297 |             num_outputs=self.vocab_size,
298 |             activation_fn=nn.relu,
299 |             scope="vocab_gate1_v1{}".format("" if self.scope_id is None else str(self.scope_id))
300 |         )
301 | 
302 |         fc2_out = slim.fully_connected(
303 |             inputs=fc1_out,
304 |             num_outputs=self.vocab_size,
305 |             activation_fn=nn.relu,
306 |             scope="vocab_gate2_v1{}".format("" if self.scope_id is None else str(self.scope_id))
307 |         )
308 | 
309 |         fc3_out = slim.fully_connected(
310 |             inputs=fc2_out,
311 |             num_outputs=self.vocab_size,
312 |             activation_fn=nn.sigmoid,
313 |             scope="vocab_gate3_v1{}".format("" if self.scope_id is None else str(self.scope_id))
314 |         )
315 | 
316 |         return fc3_out
317 | 
318 | 
319 | class ContextGateV1(modules.BaseModule):
320 |     """
321 |     Given the weight W, calculate sigmoid(WX + b) o X. o is an element-wise
322 |     multiplication.
323 |     """
324 |     def __init__(self, vocab_size, is_training, batch_norm=True, scope_id=None):
325 |         """ Initialize a class ContextGateV1. The idea and implementation is adopted from WILLOW.
326 |         :param vocab_size: int
327 |             Size of the classes.
328 |         :param is_training: bool
329 |         :param batch_norm: bool
330 |         :param scope_id: int
331 |         """
332 |         self.vocab_size = vocab_size
333 |         self.is_training = is_training
334 |         self.batch_norm = batch_norm
335 |         self.scope_id = scope_id
336 | 
337 |     def forward(self, inputs, **unused_params):
338 |         """ Forward function of ContextGateV1
339 |         :param inputs: batch_size x vocab_size
340 |         :return: batch_size x vocab_size
341 |         """
342 |         gating_weights = tf.get_variable("vocab_gate_v1{}".format("" if self.scope_id is None else str(self.scope_id)),
343 |                                          [self.vocab_size, self.vocab_size])
344 | 
345 |         # batch_size x vocab_size, vocab_size x vocab_size --> batch_size x vocab_size
346 |         gates = tf.matmul(inputs, gating_weights)
347 | 
348 |         if self.batch_norm:
349 |             gates = slim.batch_norm(
350 |                 gates,
351 |                 center=True,
352 |                 scale=True,
353 |                 is_training=self.is_training,
354 |                 scope="vocab_gate_bn_v1{}".format("" if self.scope_id is None else str(self.scope_id)))
355 | 
356 |         gates = tf.sigmoid(gates)
357 | 
358 |         # batch_size x vocab_size, batch_size x vocab_size -> batch_size x vocab_size
359 |         updated_inputs = tf.multiply(inputs, gates)
360 | 
361 |         # batch_size x vocab_size
362 |         return updated_inputs
363 | 


--------------------------------------------------------------------------------
/average_precision_calculator.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Calculate or keep track of the interpolated average precision.
 16 | 
 17 | It provides an interface for calculating interpolated average precision for an
 18 | entire list or the top-n ranked items. For the definition of the
 19 | (non-)interpolated average precision:
 20 | http://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf
 21 | 
 22 | Example usages:
 23 | 1) Use it as a static function call to directly calculate average precision for
 24 | a short ranked list in the memory.
 25 | 
 26 | ```
 27 | import random
 28 | 
 29 | p = np.array([random.random() for _ in xrange(10)])
 30 | a = np.array([random.choice([0, 1]) for _ in xrange(10)])
 31 | 
 32 | ap = average_precision_calculator.AveragePrecisionCalculator.ap(p, a)
 33 | ```
 34 | 
 35 | 2) Use it as an object for long ranked list that cannot be stored in memory or
 36 | the case where partial predictions can be observed at a time (Tensorflow
 37 | predictions). In this case, we first call the function accumulate many times
 38 | to process parts of the ranked list. After processing all the parts, we call
 39 | peek_interpolated_ap_at_n.
 40 | ```
 41 | p1 = np.array([random.random() for _ in xrange(5)])
 42 | a1 = np.array([random.choice([0, 1]) for _ in xrange(5)])
 43 | p2 = np.array([random.random() for _ in xrange(5)])
 44 | a2 = np.array([random.choice([0, 1]) for _ in xrange(5)])
 45 | 
 46 | # interpolated average precision at 10 using 1000 break points
 47 | calculator = average_precision_calculator.AveragePrecisionCalculator(10)
 48 | calculator.accumulate(p1, a1)
 49 | calculator.accumulate(p2, a2)
 50 | ap3 = calculator.peek_ap_at_n()
 51 | ```
 52 | """
 53 | # noinspection PyUnresolvedReferences
 54 | import pathmagic
 55 | import heapq
 56 | import random
 57 | import numbers
 58 | import numpy
 59 | 
 60 | 
 61 | class AveragePrecisionCalculator(object):
 62 |     """Calculate the average precision and average precision at n."""
 63 | 
 64 |     def __init__(self, top_n=None):
 65 |         """Construct an AveragePrecisionCalculator to calculate average precision.
 66 | 
 67 |         This class is used to calculate the average precision for a single label.
 68 | 
 69 |         Args:
 70 |           top_n: A positive Integer specifying the average precision at n, or
 71 |             None to use all provided data points.
 72 | 
 73 |         Raises:
 74 |           ValueError: An error occurred when the top_n is not a positive integer.
 75 |         """
 76 |         if not ((isinstance(top_n, int) and top_n >= 0) or top_n is None):
 77 |             raise ValueError("top_n must be a positive integer or None.")
 78 | 
 79 |         self._top_n = top_n  # average precision at n
 80 |         self._total_positives = 0  # total number of positives have seen
 81 |         self._heap = []  # max heap of (prediction, actual)
 82 | 
 83 |     @property
 84 |     def heap_size(self):
 85 |         """Gets the heap size maintained in the class."""
 86 |         return len(self._heap)
 87 | 
 88 |     @property
 89 |     def num_accumulated_positives(self):
 90 |         """Gets the number of positive samples that have been accumulated."""
 91 |         return self._total_positives
 92 | 
 93 |     def accumulate(self, predictions, actuals, num_positives=None):
 94 |         """Accumulate the predictions and their ground truth labels.
 95 | 
 96 |         After the function call, we may call peek_ap_at_n to actually calculate
 97 |         the average precision.
 98 |         Note predictions and actuals must have the same shape.
 99 | 
100 |         Args:
101 |           predictions: a list storing the prediction scores.
102 |           actuals: a list storing the ground truth labels. Any value
103 |           larger than 0 will be treated as positives, otherwise as negatives.
104 |           num_positives = If the 'predictions' and 'actuals' inputs aren't complete,
105 |           then it's possible some true positives were missed in them. In that case,
106 |           you can provide 'num_positives' in order to accurately track recall.
107 | 
108 |         Raises:
109 |           ValueError: An error occurred when the format of the input is not the
110 |           numpy 1-D array or the shape of predictions and actuals does not match.
111 |         """
112 |         if len(predictions) != len(actuals):
113 |             raise ValueError("the shape of predictions and actuals does not match.")
114 | 
115 |         if not num_positives is None:
116 |             if not isinstance(num_positives, numbers.Number) or num_positives < 0:
117 |                 raise ValueError("'num_positives' was provided but it wan't a nonzero number.")
118 | 
119 |         if not num_positives is None:
120 |             self._total_positives += num_positives
121 |         else:
122 |             self._total_positives += numpy.size(numpy.where(actuals > 0))
123 |         topk = self._top_n
124 |         heap = self._heap
125 | 
126 |         for i in range(numpy.size(predictions)):
127 |             if topk is None or len(heap) < topk:
128 |                 heapq.heappush(heap, (predictions[i], actuals[i]))
129 |             else:
130 |                 if predictions[i] > heap[0][0]:  # heap[0] is the smallest
131 |                     heapq.heappop(heap)
132 |                     heapq.heappush(heap, (predictions[i], actuals[i]))
133 | 
134 |     def clear(self):
135 |         """Clear the accumulated predictions."""
136 |         self._heap = []
137 |         self._total_positives = 0
138 | 
139 |     def peek_ap_at_n(self):
140 |         """Peek the non-interpolated average precision at n.
141 | 
142 |         Returns:
143 |           The non-interpolated average precision at n (default 0).
144 |           If n is larger than the length of the ranked list,
145 |           the average precision will be returned.
146 |         """
147 |         if self.heap_size <= 0:
148 |             return 0
149 |         predlists = numpy.array(list(zip(*self._heap)))
150 | 
151 |         ap = self.ap_at_n(predlists[0],
152 |                           predlists[1],
153 |                           n=self._top_n,
154 |                           total_num_positives=self._total_positives)
155 |         return ap
156 | 
157 |     @staticmethod
158 |     def ap(predictions, actuals):
159 |         """Calculate the non-interpolated average precision.
160 | 
161 |         Args:
162 |           predictions: a numpy 1-D array storing the sparse prediction scores.
163 |           actuals: a numpy 1-D array storing the ground truth labels. Any value
164 |           larger than 0 will be treated as positives, otherwise as negatives.
165 | 
166 |         Returns:
167 |           The non-interpolated average precision at n.
168 |           If n is larger than the length of the ranked list,
169 |           the average precision will be returned.
170 | 
171 |         Raises:
172 |           ValueError: An error occurred when the format of the input is not the
173 |           numpy 1-D array or the shape of predictions and actuals does not match.
174 |         """
175 |         return AveragePrecisionCalculator.ap_at_n(predictions,
176 |                                                   actuals,
177 |                                                   n=None)
178 | 
179 |     @staticmethod
180 |     def ap_at_n(predictions, actuals, n=20, total_num_positives=None):
181 |         """Calculate the non-interpolated average precision.
182 | 
183 |         Args:
184 |           predictions: a numpy 1-D array storing the sparse prediction scores.
185 |           actuals: a numpy 1-D array storing the ground truth labels. Any value
186 |           larger than 0 will be treated as positives, otherwise as negatives.
187 |           n: the top n items to be considered in ap@n.
188 |           total_num_positives : (optionally) you can specify the number of total
189 |             positive
190 |           in the list. If specified, it will be used in calculation.
191 | 
192 |         Returns:
193 |           The non-interpolated average precision at n.
194 |           If n is larger than the length of the ranked list,
195 |           the average precision will be returned.
196 | 
197 |         Raises:
198 |           ValueError: An error occurred when
199 |           1) the format of the input is not the numpy 1-D array;
200 |           2) the shape of predictions and actuals does not match;
201 |           3) the input n is not a positive integer.
202 |         """
203 |         if len(predictions) != len(actuals):
204 |             raise ValueError("the shape of predictions and actuals does not match.")
205 | 
206 |         if n is not None:
207 |             if not isinstance(n, int) or n <= 0:
208 |                 raise ValueError("n must be 'None' or a positive integer."
209 |                                  " It was '%s'." % n)
210 | 
211 |         ap = 0.0
212 | 
213 |         predictions = numpy.array(predictions)
214 |         actuals = numpy.array(actuals)
215 | 
216 |         # add a shuffler to avoid overestimating the ap
217 |         predictions, actuals = AveragePrecisionCalculator._shuffle(predictions,
218 |                                                                    actuals)
219 |         sortidx = sorted(
220 |             range(len(predictions)),
221 |             key=lambda k: predictions[k],
222 |             reverse=True)
223 | 
224 |         if total_num_positives is None:
225 |             numpos = numpy.size(numpy.where(actuals > 0))
226 |         else:
227 |             numpos = total_num_positives
228 | 
229 |         if numpos == 0:
230 |             return 0
231 | 
232 |         if n is not None:
233 |             numpos = min(numpos, n)
234 |         delta_recall = 1.0 / numpos
235 |         poscount = 0.0
236 | 
237 |         # calculate the ap
238 |         r = len(sortidx)
239 |         if n is not None:
240 |             r = min(r, n)
241 |         for i in range(r):
242 |             if actuals[sortidx[i]] > 0:
243 |                 poscount += 1
244 |                 ap += poscount / (i + 1) * delta_recall
245 |         return ap
246 | 
247 |     @staticmethod
248 |     def _shuffle(predictions, actuals):
249 |         random.seed(0)
250 |         suffidx = random.sample(range(len(predictions)), len(predictions))
251 |         predictions = predictions[suffidx]
252 |         actuals = actuals[suffidx]
253 |         return predictions, actuals
254 | 
255 |     @staticmethod
256 |     def _zero_one_normalize(predictions, epsilon=1e-7):
257 |         """Normalize the predictions to the range between 0.0 and 1.0.
258 | 
259 |         For some predictions like SVM predictions, we need to normalize them before
260 |         calculate the interpolated average precision. The normalization will not
261 |         change the rank in the original list and thus won't change the average
262 |         precision.
263 | 
264 |         Args:
265 |           predictions: a numpy 1-D array storing the sparse prediction scores.
266 |           epsilon: a small constant to avoid denominator being zero.
267 | 
268 |         Returns:
269 |           The normalized prediction.
270 |         """
271 |         denominator = numpy.max(predictions) - numpy.min(predictions)
272 |         ret = (predictions - numpy.min(predictions)) / numpy.max(denominator,
273 |                                                                  epsilon)
274 |         return ret
275 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Binary for evaluating Tensorflow models on the YouTube-8M dataset."""
 15 | import os
 16 | import sys
 17 | 
 18 | # Explicitly add the file's directory to the path list.
 19 | file_dir = os.path.dirname(__file__)
 20 | sys.path.append(file_dir)
 21 | sys.path.append(os.path.join(os.getcwd(), "modules"))
 22 | 
 23 | # noinspection PyUnresolvedReferences
 24 | import pathmagic
 25 | import glob
 26 | import json
 27 | import os
 28 | import time
 29 | import sys
 30 | import eval_util
 31 | import losses
 32 | import video_level_models
 33 | import frame_level_models
 34 | import readers
 35 | import tensorflow as tf
 36 | from tensorflow.python.lib.io import file_io
 37 | from tensorflow import app
 38 | from tensorflow import flags
 39 | from tensorflow import gfile
 40 | from tensorflow import logging
 41 | import utils
 42 | 
 43 | 
 44 | FLAGS = flags.FLAGS
 45 | 
 46 | if __name__ == "__main__":
 47 |     # Data set flags.
 48 |     flags.DEFINE_string("train_dir", "/tmp/yt8m_model/",
 49 |                         "The directory to load the model files from. "
 50 |                         "The tensorboard metrics files are also saved to this "
 51 |                         "directory.")
 52 |     flags.DEFINE_string(
 53 |       "eval_data_pattern", "",
 54 |       "File glob defining the evaluation dataset in tensorflow.SequenceExample "
 55 |       "format. The SequenceExamples are expected to have an 'rgb' byte array "
 56 |       "sequence feature as well as a 'labels' int64 context feature.")
 57 | 
 58 |     # Other flags.
 59 |     flags.DEFINE_integer("batch_size", 1024,
 60 |                          "How many examples to process per batch.")
 61 |     flags.DEFINE_integer("num_readers", 8,
 62 |                          "How many threads to use for reading input files.")
 63 |     flags.DEFINE_boolean("run_once", False, "Whether to run eval only once.")
 64 |     flags.DEFINE_integer("top_k", 20, "How many predictions to output per video.")
 65 | 
 66 | 
 67 | def find_class_by_name(name, modules):
 68 |     """Searches the provided modules for the named class and returns it."""
 69 |     modules = [getattr(module, name, None) for module in modules]
 70 |     return next(a for a in modules if a)
 71 | 
 72 | 
 73 | def get_input_evaluation_tensors(reader,
 74 |                                  data_pattern,
 75 |                                  batch_size=1024,
 76 |                                  num_readers=1):
 77 |     """Creates the section of the graph which reads the evaluation data.
 78 | 
 79 |       Args:
 80 |         reader: A class which parses the training data.
 81 |         data_pattern: A 'glob' style path to the data files.
 82 |         batch_size: How many examples to process at a time.
 83 |         num_readers: How many I/O threads to use.
 84 | 
 85 |       Returns:
 86 |         A tuple containing the features tensor, labels tensor, and optionally a
 87 |         tensor containing the number of frames per video. The exact dimensions
 88 |         depend on the reader being used.
 89 | 
 90 |       Raises:
 91 |         IOError: If no files matching the given pattern were found.
 92 |     """
 93 |     logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
 94 |     with tf.name_scope("eval_input"):
 95 |         files = gfile.Glob(data_pattern)
 96 |         if not files:
 97 |             raise IOError("Unable to find the evaluation files.")
 98 |         logging.info("number of evaluation files: " + str(len(files)))
 99 |         filename_queue = tf.train.string_input_producer(
100 |             files, shuffle=False, num_epochs=1)
101 |         eval_data = [
102 |             reader.prepare_reader(filename_queue) for _ in range(num_readers)
103 |         ]
104 |         return tf.train.batch_join(
105 |             eval_data,
106 |             batch_size=batch_size,
107 |             capacity=3 * batch_size,
108 |             allow_smaller_final_batch=True,
109 |             enqueue_many=True)
110 | 
111 | 
112 | def build_graph(reader,
113 |                 model,
114 |                 eval_data_pattern,
115 |                 label_loss_fn,
116 |                 batch_size=1024,
117 |                 num_readers=1):
118 |     """Creates the Tensorflow graph for evaluation.
119 | 
120 |       Args:
121 |         reader: The data file reader. It should inherit from BaseReader.
122 |         model: The core model (e.g. logistic or neural net). It should inherit
123 |                from BaseModel.
124 |         eval_data_pattern: glob path to the evaluation data files.
125 |         label_loss_fn: What kind of loss to apply to the model. It should inherit
126 |                     from BaseLoss.
127 |         batch_size: How many examples to process at a time.
128 |         num_readers: How many threads to use for I/O operations.
129 |     """
130 | 
131 |     global_step = tf.Variable(0, trainable=False, name="global_step")
132 |     video_id_batch, model_input_raw, labels_batch, num_frames = get_input_evaluation_tensors(
133 |         # pylint: disable=g-line-too-long
134 |         reader,
135 |         eval_data_pattern,
136 |         batch_size=batch_size,
137 |         num_readers=num_readers)
138 |     tf.summary.histogram("model_input_raw", model_input_raw)
139 | 
140 |     feature_dim = len(model_input_raw.get_shape()) - 1
141 | 
142 |     # Normalize input features.
143 |     model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)
144 | 
145 |     with tf.variable_scope("tower"):
146 |         result = model.create_model(model_input,
147 |                                     num_frames=num_frames,
148 |                                     vocab_size=reader.num_classes,
149 |                                     labels=labels_batch,
150 |                                     is_training=False)
151 |         predictions = result["predictions"]
152 |         tf.summary.histogram("model_activations", predictions)
153 |         if "loss" in result.keys():
154 |             label_loss = result["loss"]
155 |         else:
156 |             label_loss = label_loss_fn.calculate_loss(predictions, labels_batch)
157 | 
158 |     tf.add_to_collection("global_step", global_step)
159 |     tf.add_to_collection("loss", label_loss)
160 |     tf.add_to_collection("predictions", predictions)
161 |     tf.add_to_collection("input_batch", model_input)
162 |     tf.add_to_collection("input_batch_raw", model_input_raw)
163 |     tf.add_to_collection("video_id_batch", video_id_batch)
164 |     tf.add_to_collection("num_frames", num_frames)
165 |     tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
166 |     tf.add_to_collection("summary_op", tf.summary.merge_all())
167 | 
168 | 
169 | def get_latest_checkpoint():
170 |     index_files = file_io.get_matching_files(os.path.join(FLAGS.train_dir, 'model.ckpt-*.index'))
171 |     tf.logging.debug("Looking at {}".format(index_files))
172 | 
173 |     # No files
174 |     if not index_files:
175 |         return None
176 | 
177 |     # Index file path with the maximum step size.
178 |     latest_index_file = sorted(
179 |         [(int(os.path.basename(f).split("-")[-1].split(".")[0]), f)
180 |          for f in index_files])[-1][1]
181 | 
182 |     # Chop off .index suffix and return
183 |     return latest_index_file[:-6]
184 | 
185 | 
186 | def evaluation_loop(video_id_batch, prediction_batch, label_batch, loss,
187 |                     summary_op, saver, summary_writer, evl_metrics,
188 |                     last_global_step_val):
189 |     """Run the evaluation loop once.
190 | 
191 |       Args:
192 |         video_id_batch: a tensor of video ids mini-batch.
193 |         prediction_batch: a tensor of predictions mini-batch.
194 |         label_batch: a tensor of label_batch mini-batch.
195 |         loss: a tensor of loss for the examples in the mini-batch.
196 |         summary_op: a tensor which runs the tensorboard summary operations.
197 |         saver: a tensorflow saver to restore the model.
198 |         summary_writer: a tensorflow summary_writer
199 |         evl_metrics: an EvaluationMetrics object.
200 |         last_global_step_val: the global step used in the previous evaluation.
201 | 
202 |       Returns:
203 |         The global_step used in the latest model.
204 |     """
205 | 
206 |     global_step_val = -1
207 |     with tf.Session() as sess:
208 |         latest_checkpoint = get_latest_checkpoint()
209 |         print(latest_checkpoint)
210 |         if latest_checkpoint:
211 |             logging.info("Loading checkpoint for eval: " + latest_checkpoint)
212 |             # Restores from checkpoint
213 |             saver.restore(sess, latest_checkpoint)
214 |             # Assuming model_checkpoint_path looks something like:
215 |             # /my-favorite-path/yt8m_train/model.ckpt-0, extract global_step from it.
216 |             global_step_val = os.path.basename(latest_checkpoint).split("-")[-1]
217 | 
218 |             # Save model
219 |             saver.save(sess, os.path.join(FLAGS.train_dir, "inference_model"))
220 |         else:
221 |             logging.info("No checkpoint file found.")
222 |             return global_step_val
223 | 
224 |         if global_step_val == last_global_step_val:
225 |             logging.info("skip this checkpoint global_step_val=%s "
226 |                          "(same as the previous one).", global_step_val)
227 |             return global_step_val
228 | 
229 |         sess.run([tf.local_variables_initializer()])
230 | 
231 |         # Start the queue runners.
232 |         fetches = [video_id_batch, prediction_batch, label_batch, loss, summary_op]
233 |         coord = tf.train.Coordinator()
234 |         try:
235 |             threads = []
236 |             for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
237 |                 threads.extend(qr.create_threads(
238 |                     sess, coord=coord, daemon=True,
239 |                     start=True))
240 |             logging.info("enter eval_once loop global_step_val = %s. ",
241 |                          global_step_val)
242 | 
243 |             evl_metrics.clear()
244 | 
245 |             examples_processed = 0
246 |             while not coord.should_stop():
247 |                 batch_start_time = time.time()
248 |                 _, predictions_val, labels_val, loss_val, summary_val = sess.run(
249 |                     fetches)
250 |                 seconds_per_batch = time.time() - batch_start_time
251 |                 example_per_second = labels_val.shape[0] / seconds_per_batch
252 |                 examples_processed += labels_val.shape[0]
253 | 
254 |                 iteration_info_dict = evl_metrics.accumulate(predictions_val,
255 |                                                              labels_val, loss_val)
256 |                 iteration_info_dict["examples_per_second"] = example_per_second
257 | 
258 |                 iterinfo = utils.AddGlobalStepSummary(
259 |                     summary_writer,
260 |                     global_step_val,
261 |                     iteration_info_dict,
262 |                     summary_scope="Eval")
263 |                 logging.info("examples_processed: %d | %s", examples_processed,
264 |                              iterinfo)
265 | 
266 |         except tf.errors.OutOfRangeError as e:
267 |             logging.info(
268 |                 "Done with batched inference. Now calculating global performance "
269 |                 "metrics.")
270 |             # calculate the metrics for the entire epoch
271 |             epoch_info_dict = evl_metrics.get()
272 |             epoch_info_dict["epoch_id"] = global_step_val
273 | 
274 |             summary_writer.add_summary(summary_val, global_step_val)
275 |             epochinfo = utils.AddEpochSummary(
276 |                 summary_writer,
277 |                 global_step_val,
278 |                 epoch_info_dict,
279 |                 summary_scope="Eval")
280 |             logging.info(epochinfo)
281 |             evl_metrics.clear()
282 |         except Exception as e:  # pylint: disable=broad-except
283 |             logging.info("Unexpected exception: " + str(e))
284 |             coord.request_stop(e)
285 | 
286 |         coord.request_stop()
287 |         coord.join(threads, stop_grace_period_secs=10)
288 | 
289 |         return global_step_val
290 | 
291 | 
292 | def evaluate():
293 |     tf.set_random_seed(0)  # for reproducibility
294 | 
295 |     # Write json of flags
296 |     model_flags_path = os.path.join(FLAGS.train_dir, "model_flags.json")
297 |     if not file_io.file_exists(model_flags_path):
298 |         raise IOError(("Cannot find file %s. Did you run train.py on the same "
299 |                        "--train_dir?") % model_flags_path)
300 |     flags_dict = json.loads(file_io.FileIO(model_flags_path, mode="r").read())
301 | 
302 |     with tf.Graph().as_default():
303 |         # convert feature_names and feature_sizes to lists of values
304 |         feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
305 |             flags_dict["feature_names"], flags_dict["feature_sizes"])
306 | 
307 |         if flags_dict["frame_features"]:
308 |             reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
309 |                                                     feature_sizes=feature_sizes)
310 |         else:
311 |             reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
312 |                                                          feature_sizes=feature_sizes)
313 | 
314 |         model = find_class_by_name(flags_dict["model"],
315 |                                    [frame_level_models, video_level_models])()
316 |         label_loss_fn = find_class_by_name(flags_dict["label_loss"], [losses])()
317 | 
318 |         if FLAGS.eval_data_pattern is "":
319 |             raise IOError("'eval_data_pattern' was not specified. " +
320 |                           "Nothing to evaluate.")
321 | 
322 |         build_graph(
323 |             reader=reader,
324 |             model=model,
325 |             eval_data_pattern=FLAGS.eval_data_pattern,
326 |             label_loss_fn=label_loss_fn,
327 |             num_readers=FLAGS.num_readers,
328 |             batch_size=FLAGS.batch_size)
329 |         logging.info("built evaluation graph")
330 |         video_id_batch = tf.get_collection("video_id_batch")[0]
331 |         prediction_batch = tf.get_collection("predictions")[0]
332 |         label_batch = tf.get_collection("labels")[0]
333 |         loss = tf.get_collection("loss")[0]
334 |         summary_op = tf.get_collection("summary_op")[0]
335 | 
336 |         saver = tf.train.Saver(tf.global_variables())
337 |         summary_writer = tf.summary.FileWriter(
338 |             FLAGS.train_dir, graph=tf.get_default_graph())
339 | 
340 |         evl_metrics = eval_util.EvaluationMetrics(reader.num_classes, FLAGS.top_k)
341 | 
342 |         last_global_step_val = -1
343 |         while True:
344 |             last_global_step_val = evaluation_loop(video_id_batch, prediction_batch,
345 |                                                    label_batch, loss, summary_op,
346 |                                                    saver, summary_writer, evl_metrics,
347 |                                                    last_global_step_val)
348 |             if FLAGS.run_once:
349 |                 break
350 | 
351 | 
352 | def main(unused_argv):
353 |     logging.set_verbosity(tf.logging.INFO)
354 |     print("tensorflow version: %s" % tf.__version__)
355 |     evaluate()
356 | 
357 | 
358 | if __name__ == "__main__":
359 |   app.run()
360 | 
361 | 


--------------------------------------------------------------------------------
/eval_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Provides functions to help with evaluating models."""
 16 | import mean_average_precision_calculator as map_calculator
 17 | import average_precision_calculator as ap_calculator
 18 | import numpy
 19 | 
 20 | 
 21 | def flatten(l):
 22 |     """ Merges a list of lists into a single list. """
 23 |     return [item for sublist in l for item in sublist]
 24 | 
 25 | 
 26 | def calculate_hit_at_one(predictions, actuals):
 27 |     """Performs a local (numpy) calculation of the hit at one.
 28 | 
 29 |     Args:
 30 |     predictions: Matrix containing the outputs of the model.
 31 |       Dimensions are 'batch' x 'num_classes'.
 32 |     actuals: Matrix containing the ground truth labels.
 33 |       Dimensions are 'batch' x 'num_classes'.
 34 | 
 35 |     Returns:
 36 |     float: The average hit at one across the entire batch.
 37 |     """
 38 |     top_prediction = numpy.argmax(predictions, 1)
 39 |     hits = actuals[numpy.arange(actuals.shape[0]), top_prediction]
 40 |     return numpy.average(hits)
 41 | 
 42 | 
 43 | def calculate_precision_at_equal_recall_rate(predictions, actuals):
 44 |     """Performs a local (numpy) calculation of the PERR.
 45 | 
 46 |     Args:
 47 |     predictions: Matrix containing the outputs of the model.
 48 |       Dimensions are 'batch' x 'num_classes'.
 49 |     actuals: Matrix containing the ground truth labels.
 50 |       Dimensions are 'batch' x 'num_classes'.
 51 | 
 52 |     Returns:
 53 |     float: The average precision at equal recall rate across the entire batch.
 54 |     """
 55 |     aggregated_precision = 0.0
 56 |     num_videos = actuals.shape[0]
 57 |     for row in numpy.arange(num_videos):
 58 |         num_labels = int(numpy.sum(actuals[row]))
 59 |         top_indices = numpy.argpartition(predictions[row],
 60 |                                      -num_labels)[-num_labels:]
 61 |         item_precision = 0.0
 62 |         for label_index in top_indices:
 63 |             if predictions[row][label_index] > 0:
 64 |                 item_precision += actuals[row][label_index]
 65 |         item_precision /= top_indices.size
 66 |         aggregated_precision += item_precision
 67 |     aggregated_precision /= num_videos
 68 |     return aggregated_precision
 69 | 
 70 | 
 71 | def calculate_gap(predictions, actuals, top_k=20):
 72 |     """Performs a local (numpy) calculation of the global average precision.
 73 | 
 74 |     Only the top_k predictions are taken for each of the videos.
 75 | 
 76 |     Args:
 77 |     predictions: Matrix containing the outputs of the model.
 78 |       Dimensions are 'batch' x 'num_classes'.
 79 |     actuals: Matrix containing the ground truth labels.
 80 |       Dimensions are 'batch' x 'num_classes'.
 81 |     top_k: How many predictions to use per video.
 82 | 
 83 |     Returns:
 84 |     float: The global average precision.
 85 |     """
 86 |     gap_calculator = ap_calculator.AveragePrecisionCalculator()
 87 |     sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
 88 |     gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
 89 |     return gap_calculator.peek_ap_at_n()
 90 | 
 91 | 
 92 | def top_k_by_class(predictions, labels, k=20):
 93 |     """Extracts the top k predictions for each video, sorted by class.
 94 | 
 95 |     Args:
 96 |     predictions: A numpy matrix containing the outputs of the model.
 97 |       Dimensions are 'batch' x 'num_classes'.
 98 |     k: the top k non-zero entries to preserve in each prediction.
 99 | 
100 |     Returns:
101 |     A tuple (predictions,labels, true_positives). 'predictions' and 'labels'
102 |     are lists of lists of floats. 'true_positives' is a list of scalars. The
103 |     length of the lists are equal to the number of classes. The entries in the
104 |     predictions variable are probability predictions, and
105 |     the corresponding entries in the labels variable are the ground truth for
106 |     those predictions. The entries in 'true_positives' are the number of true
107 |     positives for each class in the ground truth.
108 | 
109 |     Raises:
110 |     ValueError: An error occurred when the k is not a positive integer.
111 |     """
112 |     if k <= 0:
113 |         raise ValueError("k must be a positive integer.")
114 |     k = min(k, predictions.shape[1])
115 |     num_classes = predictions.shape[1]
116 |     prediction_triplets= []
117 |     for video_index in range(predictions.shape[0]):
118 |         prediction_triplets.extend(top_k_triplets(predictions[video_index],labels[video_index], k))
119 |     out_predictions = [[] for v in range(num_classes)]
120 |     out_labels = [[] for v in range(num_classes)]
121 |     for triplet in prediction_triplets:
122 |         out_predictions[triplet[0]].append(triplet[1])
123 |         out_labels[triplet[0]].append(triplet[2])
124 |     out_true_positives = [numpy.sum(labels[:,i]) for i in range(num_classes)]
125 | 
126 |     return out_predictions, out_labels, out_true_positives
127 | 
128 | 
129 | def top_k_triplets(predictions, labels, k=20):
130 |     """Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
131 |     (prediction, class) format"""
132 |     m = len(predictions)
133 |     k = min(k, m)
134 |     indices = numpy.argpartition(predictions, -k)[-k:]
135 |     return [(index, predictions[index], labels[index]) for index in indices]
136 | 
137 | 
138 | class EvaluationMetrics(object):
139 |     """A class to store the evaluation metrics."""
140 |     def __init__(self, num_class, top_k):
141 |         """Construct an EvaluationMetrics object to store the evaluation metrics.
142 | 
143 |         Args:
144 |           num_class: A positive integer specifying the number of classes.
145 |           top_k: A positive integer specifying how many predictions are considered per video.
146 | 
147 |         Raises:
148 |           ValueError: An error occurred when MeanAveragePrecisionCalculator cannot
149 |             not be constructed.
150 |         """
151 |         self.sum_hit_at_one = 0.0
152 |         self.sum_perr = 0.0
153 |         self.sum_loss = 0.0
154 |         self.map_calculator = map_calculator.MeanAveragePrecisionCalculator(num_class)
155 |         self.global_ap_calculator = ap_calculator.AveragePrecisionCalculator()
156 |         self.top_k = top_k
157 |         self.num_examples = 0
158 | 
159 |     def accumulate(self, predictions, labels, loss):
160 |         """Accumulate the metrics calculated locally for this mini-batch.
161 | 
162 |         Args:
163 |           predictions: A numpy matrix containing the outputs of the model.
164 |             Dimensions are 'batch' x 'num_classes'.
165 |           labels: A numpy matrix containing the ground truth labels.
166 |             Dimensions are 'batch' x 'num_classes'.
167 |           loss: A numpy array containing the loss for each sample.
168 | 
169 |         Returns:
170 |           dictionary: A dictionary storing the metrics for the mini-batch.
171 | 
172 |         Raises:
173 |           ValueError: An error occurred when the shape of predictions and actuals
174 |             does not match.
175 |         """
176 |         batch_size = labels.shape[0]
177 |         mean_hit_at_one = calculate_hit_at_one(predictions, labels)
178 |         mean_perr = calculate_precision_at_equal_recall_rate(predictions, labels)
179 |         mean_loss = numpy.mean(loss)
180 | 
181 |         # Take the top 20 predictions.
182 |         sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, labels, self.top_k)
183 |         self.map_calculator.accumulate(sparse_predictions, sparse_labels, num_positives)
184 |         self.global_ap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
185 | 
186 |         self.num_examples += batch_size
187 |         self.sum_hit_at_one += mean_hit_at_one * batch_size
188 |         self.sum_perr += mean_perr * batch_size
189 |         self.sum_loss += mean_loss * batch_size
190 | 
191 |         return {"hit_at_one": mean_hit_at_one, "perr": mean_perr, "loss": mean_loss}
192 | 
193 |     def get(self):
194 |         """Calculate the evaluation metrics for the whole epoch.
195 | 
196 |         Raises:
197 |           ValueError: If no examples were accumulated.
198 | 
199 |         Returns:
200 |           dictionary: a dictionary storing the evaluation metrics for the epoch. The
201 |             dictionary has the fields: avg_hit_at_one, avg_perr, avg_loss, and
202 |             aps (default nan).
203 |         """
204 |         if self.num_examples <= 0:
205 |             raise ValueError("total_sample must be positive.")
206 |         avg_hit_at_one = self.sum_hit_at_one / self.num_examples
207 |         avg_perr = self.sum_perr / self.num_examples
208 |         avg_loss = self.sum_loss / self.num_examples
209 | 
210 |         aps = self.map_calculator.peek_map_at_n()
211 |         gap = self.global_ap_calculator.peek_ap_at_n()
212 | 
213 |         epoch_info_dict = {}
214 |         return {"avg_hit_at_one": avg_hit_at_one, "avg_perr": avg_perr,
215 |                 "avg_loss": avg_loss, "aps": aps, "gap": gap}
216 | 
217 |     def clear(self):
218 |         """Clear the evaluation metrics and reset the EvaluationMetrics object."""
219 |         self.sum_hit_at_one = 0.0
220 |         self.sum_perr = 0.0
221 |         self.sum_loss = 0.0
222 |         self.map_calculator.clear()
223 |         self.global_ap_calculator.clear()
224 |         self.num_examples = 0
225 | 


--------------------------------------------------------------------------------
/export_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Utilities to export a model for batch prediction."""
 15 | # noinspection PyUnresolvedReferences
 16 | import pathmagic
 17 | import tensorflow as tf
 18 | import tensorflow.contrib.slim as slim
 19 | 
 20 | from tensorflow.python.saved_model import builder as saved_model_builder
 21 | from tensorflow.python.saved_model import signature_constants
 22 | from tensorflow.python.saved_model import signature_def_utils
 23 | from tensorflow.python.saved_model import tag_constants
 24 | from tensorflow.python.saved_model import utils as saved_model_utils
 25 | 
 26 | _TOP_PREDICTIONS_IN_OUTPUT = 20
 27 | 
 28 | 
 29 | class ModelExporter(object):
 30 |     def __init__(self, frame_features, model, reader):
 31 |         self.frame_features = frame_features
 32 |         self.model = model
 33 |         self.reader = reader
 34 | 
 35 |         with tf.Graph().as_default() as graph:
 36 |             self.inputs, self.outputs = self.build_inputs_and_outputs()
 37 |             self.graph = graph
 38 |             self.saver = tf.train.Saver(tf.trainable_variables(), sharded=True)
 39 | 
 40 |     def export_model(self, model_dir, global_step_val, last_checkpoint):
 41 |         """Exports the model so that it can used for batch predictions."""
 42 |         with self.graph.as_default():
 43 |             with tf.Session() as session:
 44 |                 session.run(tf.global_variables_initializer())
 45 |                 self.saver.restore(session, last_checkpoint)
 46 | 
 47 |                 signature = signature_def_utils.build_signature_def(
 48 |                     inputs=self.inputs,
 49 |                     outputs=self.outputs,
 50 |                     method_name=signature_constants.PREDICT_METHOD_NAME)
 51 | 
 52 |                 signature_map = {signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
 53 |                                      signature}
 54 | 
 55 |                 model_builder = saved_model_builder.SavedModelBuilder(model_dir)
 56 |                 model_builder.add_meta_graph_and_variables(session,
 57 |                                                            tags=[tag_constants.SERVING],
 58 |                                                            signature_def_map=signature_map,
 59 |                                                            clear_devices=True)
 60 |                 model_builder.save()
 61 | 
 62 |     def build_inputs_and_outputs(self):
 63 |         if self.frame_features:
 64 |             serialized_examples = tf.placeholder(tf.string, shape=(None,))
 65 | 
 66 |             fn = lambda x: self.build_prediction_graph(x)
 67 |             video_id_output, top_indices_output, top_predictions_output = (
 68 |                 tf.map_fn(fn, serialized_examples,
 69 |                           dtype=(tf.string, tf.int32, tf.float32)))
 70 | 
 71 |         else:
 72 |             serialized_examples = tf.placeholder(tf.string, shape=(None,))
 73 | 
 74 |             video_id_output, top_indices_output, top_predictions_output = (
 75 |                 self.build_prediction_graph(serialized_examples))
 76 | 
 77 |         inputs = {"example_bytes":
 78 |                       saved_model_utils.build_tensor_info(serialized_examples)}
 79 | 
 80 |         outputs = {
 81 |             "video_id": saved_model_utils.build_tensor_info(video_id_output),
 82 |             "class_indexes": saved_model_utils.build_tensor_info(top_indices_output),
 83 |             "predictions": saved_model_utils.build_tensor_info(top_predictions_output)}
 84 | 
 85 |         return inputs, outputs
 86 | 
 87 |     def build_prediction_graph(self, serialized_examples):
 88 |         video_id, model_input_raw, labels_batch, num_frames = (
 89 |             self.reader.prepare_serialized_examples(serialized_examples))
 90 | 
 91 |         feature_dim = len(model_input_raw.get_shape()) - 1
 92 |         model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)
 93 | 
 94 |         with tf.variable_scope("tower"):
 95 |             result = self.model.create_model(
 96 |                 model_input,
 97 |                 num_frames=num_frames,
 98 |                 vocab_size=self.reader.num_classes,
 99 |                 labels=labels_batch,
100 |                 is_training=False)
101 | 
102 |             for variable in slim.get_model_variables():
103 |                 tf.summary.histogram(variable.op.name, variable)
104 | 
105 |             predictions = result["predictions"]
106 | 
107 |             top_predictions, top_indices = tf.nn.top_k(predictions,
108 |                                                        _TOP_PREDICTIONS_IN_OUTPUT)
109 |         return video_id, top_indices, top_predictions
110 | 


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Binary for generating predictions over a set of videos."""
 16 | import os
 17 | import sys
 18 | 
 19 | # Explicitly add the file's directory to the path list.
 20 | file_dir = os.path.dirname(__file__)
 21 | sys.path.append(file_dir)
 22 | sys.path.append(os.path.join(os.getcwd(), "modules"))
 23 | # noinspection PyUnresolvedReferences
 24 | import pathmagic
 25 | import os
 26 | import glob
 27 | import json
 28 | import tarfile
 29 | import time
 30 | import sys
 31 | import numpy
 32 | import tensorflow as tf
 33 | from tensorflow.python.lib.io import file_io
 34 | from tensorflow import app
 35 | from tensorflow import flags
 36 | from tensorflow import gfile
 37 | from tensorflow import logging
 38 | 
 39 | import eval_util
 40 | import losses
 41 | import readers
 42 | import utils
 43 | 
 44 | 
 45 | FLAGS = flags.FLAGS
 46 | 
 47 | if __name__ == '__main__':
 48 |     # Input
 49 |     flags.DEFINE_string("train_dir", "",
 50 |                         "The directory to load the model files from. We assume "
 51 |                         "that you have already run eval.py onto this, such that "
 52 |                         "inference_model.* files already exist.")
 53 |     flags.DEFINE_string(
 54 |         "input_data_pattern", "",
 55 |         "File glob defining the evaluation dataset in tensorflow.SequenceExample "
 56 |         "format. The SequenceExamples are expected to have an 'rgb' byte array "
 57 |         "sequence feature as well as a 'labels' int64 context feature.")
 58 |     flags.DEFINE_string("input_model_tgz", "",
 59 |                         "If given, must be path to a .tgz file that was written "
 60 |                         "by this binary using flag --output_model_tgz. In this "
 61 |                         "case, the .tgz file will be untarred to "
 62 |                         "--untar_model_dir and the model will be used for "
 63 |                         "inference.")
 64 |     flags.DEFINE_string("untar_model_dir", "/tmp/yt8m-model",
 65 |                         "If --input_model_tgz is given, then this directory will "
 66 |                         "be created and the contents of the .tgz file will be "
 67 |                         "untarred here.")
 68 | 
 69 |     # Output
 70 |     flags.DEFINE_string("output_file", "",
 71 |                         "The file to save the predictions to.")
 72 |     flags.DEFINE_string("output_model_tgz", "",
 73 |                         "If given, should be a filename with a .tgz extension, "
 74 |                         "the model graph and checkpoint will be bundled in this "
 75 |                         "gzip tar. This file can be uploaded to Kaggle for the "
 76 |                         "top 10 participants.")
 77 |     flags.DEFINE_integer("top_k", 20,
 78 |                          "How many predictions to output per video.")
 79 | 
 80 |     # Other flags.
 81 |     flags.DEFINE_integer(
 82 |         "batch_size", 1024,
 83 |         "How many examples to process per batch.")
 84 |     flags.DEFINE_integer("num_readers", 1,
 85 |                          "How many threads to use for reading input files.")
 86 | 
 87 | 
 88 | def format_lines(video_ids, predictions, top_k):
 89 |     batch_size = len(video_ids)
 90 |     for video_index in range(batch_size):
 91 |         top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
 92 |         line = [(class_index, predictions[video_index][class_index])
 93 |                 for class_index in top_indices]
 94 |         line = sorted(line, key=lambda p: -p[1])
 95 |         yield video_ids[video_index].decode('utf-8') + "," + " ".join(
 96 |             "%i %g" % (label, score) for (label, score) in line) + "\n"
 97 | 
 98 | 
 99 | def get_input_data_tensors(reader, data_pattern, batch_size, num_readers=1):
100 |     """Creates the section of the graph which reads the input data.
101 | 
102 |       Args:
103 |         reader: A class which parses the input data.
104 |         data_pattern: A 'glob' style path to the data files.
105 |         batch_size: How many examples to process at a time.
106 |         num_readers: How many I/O threads to use.
107 | 
108 |       Returns:
109 |         A tuple containing the features tensor, labels tensor, and optionally a
110 |         tensor containing the number of frames per video. The exact dimensions
111 |         depend on the reader being used.
112 | 
113 |       Raises:
114 |         IOError: If no files matching the given pattern were found.
115 |     """
116 |     with tf.name_scope("input"):
117 |         files = gfile.Glob(data_pattern)
118 |         if not files:
119 |             raise IOError("Unable to find input files. data_pattern='" +
120 |                           data_pattern + "'")
121 |         logging.info("number of input files: " + str(len(files)))
122 |         filename_queue = tf.train.string_input_producer(
123 |             files, num_epochs=1, shuffle=False)
124 |         examples_and_labels = [reader.prepare_reader(filename_queue)
125 |                                for _ in range(num_readers)]
126 | 
127 |         video_id_batch, video_batch, unused_labels, num_frames_batch = (
128 |             tf.train.batch_join(examples_and_labels,
129 |                                 batch_size=batch_size,
130 |                                 allow_smaller_final_batch=True,
131 |                                 enqueue_many=True))
132 |         return video_id_batch, video_batch, num_frames_batch
133 | 
134 | 
135 | def inference(reader, train_dir, data_pattern, out_file_location, batch_size, top_k):
136 |     with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess, gfile.Open(out_file_location,
137 |                                                                                           "w+") as out_file:
138 |         video_id_batch, video_batch, num_frames_batch = get_input_data_tensors(reader, data_pattern, batch_size)
139 |         checkpoint_file = os.path.join(FLAGS.train_dir, "inference_model")
140 |         if not gfile.Exists(checkpoint_file + ".meta"):
141 |             raise IOError("Cannot find %s. Did you run eval.py?" % checkpoint_file)
142 |         meta_graph_location = checkpoint_file + ".meta"
143 |         logging.info("loading meta-graph: " + meta_graph_location)
144 | 
145 |         if FLAGS.output_model_tgz:
146 |             out_file_tgz = file_io.FileIO(FLAGS.output_model_tgz, "w")
147 |             with tarfile.open(fileobj=out_file_tgz, mode="w:gz") as tar:
148 |                 for model_file in file_io.get_matching_files(checkpoint_file + '.*'):
149 |                     # tar.addfile(file_io.FileIO(model_file, "r"), arcname=os.path.basename(model_file))
150 |                     tar.addfile(file_io.FileIO(model_file, "r"))
151 |                 # tar.add(os.path.join(FLAGS.train_dir, "model_flags.json"),
152 |                 #         arcname="model_flags.json")
153 |                 tar.addfile(file_io.FileIO(os.path.join(FLAGS.train_dir, "model_flags.json"), "r"))
154 |             print('Tarred model onto ' + FLAGS.output_model_tgz)
155 |         with tf.device("/gpu:0"):
156 |             saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
157 |         logging.info("restoring variables from " + checkpoint_file)
158 |         saver.restore(sess, checkpoint_file)
159 |         input_tensor = tf.get_collection("input_batch_raw")[0]
160 |         num_frames_tensor = tf.get_collection("num_frames")[0]
161 |         predictions_tensor = tf.get_collection("predictions")[0]
162 | 
163 |         # Workaround for num_epochs issue.
164 |         def set_up_init_ops(variables):
165 |             init_op_list = []
166 |             for variable in list(variables):
167 |                 if "train_input" in variable.name:
168 |                     init_op_list.append(tf.assign(variable, 1))
169 |                     variables.remove(variable)
170 |             init_op_list.append(tf.variables_initializer(variables))
171 |             return init_op_list
172 | 
173 |         # tf.get_default_graph().clear_collection("queue_runners")
174 |         # tf.get_default_graph().clear_collection("local_variables")
175 |         sess.run(set_up_init_ops(tf.get_collection_ref(
176 |             tf.GraphKeys.LOCAL_VARIABLES)))
177 | 
178 |         coord = tf.train.Coordinator()
179 |         threads = tf.train.start_queue_runners(sess=sess, coord=coord)
180 |         num_examples_processed = 0
181 |         start_time = time.time()
182 |         out_file.write("VideoId,LabelConfidencePairs\n")
183 | 
184 |         try:
185 |             while not coord.should_stop():
186 |                 video_id_batch_val, video_batch_val, num_frames_batch_val = sess.run(
187 |                     [video_id_batch, video_batch, num_frames_batch])
188 |                 predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: video_batch_val,
189 |                                                                              num_frames_tensor: num_frames_batch_val})
190 |                 now = time.time()
191 |                 num_examples_processed += len(video_batch_val)
192 |                 num_classes = predictions_val.shape[1]
193 |                 logging.info(
194 |                     "num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(
195 |                         now - start_time))
196 |                 for line in format_lines(video_id_batch_val, predictions_val, top_k):
197 |                     out_file.write(line)
198 |                 out_file.flush()
199 | 
200 |         except tf.errors.OutOfRangeError:
201 |             logging.info('Done with inference. The output file was written to ' + out_file_location)
202 |         finally:
203 |             coord.request_stop()
204 | 
205 |         coord.join(threads)
206 |         sess.close()
207 | 
208 | 
209 | def main(unused_argv):
210 |     logging.set_verbosity(tf.logging.INFO)
211 |     if FLAGS.input_model_tgz:
212 |         if FLAGS.train_dir:
213 |             raise ValueError("You cannot supply --train_dir if supplying "
214 |                              "--input_model_tgz")
215 |         # Untar.
216 |         if not file_io.file_exists(FLAGS.untar_model_dir):
217 |             os.makedirs(FLAGS.untar_model_dir)
218 |         tarfile.open(FLAGS.input_model_tgz).extractall(FLAGS.untar_model_dir)
219 |         FLAGS.train_dir = FLAGS.untar_model_dir
220 | 
221 |     flags_dict_file = os.path.join(FLAGS.train_dir, "model_flags.json")
222 |     if not file_io.file_exists(flags_dict_file):
223 |         raise IOError("Cannot find %s. Did you run eval.py?" % flags_dict_file)
224 |     flags_dict = json.loads(file_io.FileIO(flags_dict_file, "r").read())
225 | 
226 |     # convert feature_names and feature_sizes to lists of values
227 |     feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
228 |         flags_dict["feature_names"], flags_dict["feature_sizes"])
229 | 
230 |     if flags_dict["frame_features"]:
231 |         reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
232 |                                                 feature_sizes=feature_sizes)
233 |     else:
234 |         reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
235 |                                                      feature_sizes=feature_sizes)
236 | 
237 |     if FLAGS.output_file is "":
238 |         raise ValueError("'output_file' was not specified. "
239 |                          "Unable to continue with inference.")
240 | 
241 |     if FLAGS.input_data_pattern is "":
242 |         raise ValueError("'input_data_pattern' was not specified. "
243 |                          "Unable to continue with inference.")
244 | 
245 |     inference(reader, FLAGS.train_dir, FLAGS.input_data_pattern,
246 |               FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k)
247 | 
248 | 
249 | if __name__ == "__main__":
250 |   app.run()
251 | 


--------------------------------------------------------------------------------
/losses.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS-IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Provides definitions for non-regularized training or test losses."""
16 | # noinspection PyUnresolvedReferences
17 | import pathmagic
18 | import tensorflow as tf
19 | 
20 | 
21 | class BaseLoss(object):
22 |     """Inherit from this class when implementing new losses."""
23 | 
24 |     def calculate_loss(self, unused_predictions, unused_labels, **unused_params):
25 |         """Calculates the average loss of the examples in a mini-batch.
26 | 
27 |          Args:
28 |           unused_predictions: a 2-d tensor storing the prediction scores, in which
29 |             each row represents a sample in the mini-batch and each column
30 |             represents a class.
31 |           unused_labels: a 2-d tensor storing the labels, which has the same shape
32 |             as the unused_predictions. The labels must be in the range of 0 and 1.
33 |           unused_params: loss specific parameters.
34 | 
35 |         Returns:
36 |           A scalar loss tensor.
37 |         """
38 |         raise NotImplementedError()
39 | 
40 | 
41 | class CrossEntropyLoss(BaseLoss):
42 |     """Calculate the cross entropy loss between the predictions and labels.
43 |     """
44 |     def calculate_loss(self, predictions, labels, **unused_params):
45 |         with tf.name_scope("loss_xent"):
46 |             epsilon = 10e-6
47 |             float_labels = tf.cast(labels, tf.float32)
48 |             cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + \
49 |                                  (1 - float_labels) * tf.log(1 - predictions + epsilon)
50 |             cross_entropy_loss = tf.negative(cross_entropy_loss)
51 |         return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
52 | 
53 | 
54 | class HingeLoss(BaseLoss):
55 |     """Calculate the hinge loss between the predictions and labels.
56 | 
57 |     Note the subgradient is used in the backpropagation, and thus the optimization
58 |     may converge slower. The predictions trained by the hinge loss are between -1
59 |     and +1.
60 |     """
61 |     def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
62 |         with tf.name_scope("loss_hinge"):
63 |             float_labels = tf.cast(labels, tf.float32)
64 |             all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
65 |             all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
66 |             sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
67 |             hinge_loss = tf.maximum(
68 |               all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
69 |             return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
70 | 
71 | 
72 | class SoftmaxLoss(BaseLoss):
73 |     """Calculate the softmax loss between the predictions and labels.
74 | 
75 |     The function calculates the loss in the following way: first we feed the
76 |     predictions to the softmax activation function and then we calculate
77 |     the minus linear dot product between the logged softmax activations and the
78 |     normalized ground truth label.
79 | 
80 |     It is an extension to the one-hot label. It allows for more than one positive
81 |     labels for each sample.
82 |     """
83 | 
84 |     def calculate_loss(self, predictions, labels, **unused_params):
85 |         with tf.name_scope("loss_softmax"):
86 |             epsilon = 10e-8
87 |             float_labels = tf.cast(labels, tf.float32)
88 |             # l1 normalization (labels are no less than 0)
89 |             label_rowsum = tf.maximum(
90 |               tf.reduce_sum(float_labels, 1, keep_dims=True),
91 |               epsilon)
92 |             norm_float_labels = tf.div(float_labels, label_rowsum)
93 |             softmax_outputs = tf.nn.softmax(predictions)
94 |             softmax_loss = tf.negative(tf.reduce_sum(
95 |               tf.multiply(norm_float_labels, tf.log(softmax_outputs)), 1))
96 |         return tf.reduce_mean(softmax_loss)
97 | 


--------------------------------------------------------------------------------
/mean_average_precision_calculator.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Calculate the mean average precision.
 16 | 
 17 | It provides an interface for calculating mean average precision
 18 | for an entire list or the top-n ranked items.
 19 | 
 20 | Example usages:
 21 | We first call the function accumulate many times to process parts of the ranked
 22 | list. After processing all the parts, we call peek_map_at_n
 23 | to calculate the mean average precision.
 24 | 
 25 | ```
 26 | import random
 27 | 
 28 | p = np.array([[random.random() for _ in xrange(50)] for _ in xrange(1000)])
 29 | a = np.array([[random.choice([0, 1]) for _ in xrange(50)]
 30 |      for _ in xrange(1000)])
 31 | 
 32 | # mean average precision for 50 classes.
 33 | calculator = mean_average_precision_calculator.MeanAveragePrecisionCalculator(
 34 |             num_class=50)
 35 | calculator.accumulate(p, a)
 36 | aps = calculator.peek_map_at_n()
 37 | ```
 38 | """
 39 | # noinspection PyUnresolvedReferences
 40 | import pathmagic
 41 | import numpy
 42 | import average_precision_calculator
 43 | 
 44 | 
 45 | class MeanAveragePrecisionCalculator(object):
 46 |     """This class is to calculate mean average precision.
 47 |     """
 48 | 
 49 |     def __init__(self, num_class):
 50 |         """Construct a calculator to calculate the (macro) average precision.
 51 | 
 52 |         Args:
 53 |           num_class: A positive Integer specifying the number of classes.
 54 |           top_n_array: A list of positive integers specifying the top n for each
 55 |           class. The top n in each class will be used to calculate its average
 56 |           precision at n.
 57 |           The size of the array must be num_class.
 58 | 
 59 |         Raises:
 60 |           ValueError: An error occurred when num_class is not a positive integer;
 61 |           or the top_n_array is not a list of positive integers.
 62 |         """
 63 |         if not isinstance(num_class, int) or num_class <= 1:
 64 |             raise ValueError("num_class must be a positive integer.")
 65 | 
 66 |         self._ap_calculators = []  # member of AveragePrecisionCalculator
 67 |         self._num_class = num_class  # total number of classes
 68 |         for i in range(num_class):
 69 |             self._ap_calculators.append(
 70 |                 average_precision_calculator.AveragePrecisionCalculator())
 71 | 
 72 |     def accumulate(self, predictions, actuals, num_positives=None):
 73 |         """Accumulate the predictions and their ground truth labels.
 74 | 
 75 |         Args:
 76 |           predictions: A list of lists storing the prediction scores. The outer
 77 |           dimension corresponds to classes.
 78 |           actuals: A list of lists storing the ground truth labels. The dimensions
 79 |           should correspond to the predictions input. Any value
 80 |           larger than 0 will be treated as positives, otherwise as negatives.
 81 |           num_positives: If provided, it is a list of numbers representing the
 82 |           number of true positives for each class. If not provided, the number of
 83 |           true positives will be inferred from the 'actuals' array.
 84 | 
 85 |         Raises:
 86 |           ValueError: An error occurred when the shape of predictions and actuals
 87 |           does not match.
 88 |         """
 89 |         if not num_positives:
 90 |             num_positives = [None for i in predictions.shape[1]]
 91 | 
 92 |         calculators = self._ap_calculators
 93 |         for i in range(len(predictions)):
 94 |             calculators[i].accumulate(predictions[i], actuals[i], num_positives[i])
 95 | 
 96 |     def clear(self):
 97 |         for calculator in self._ap_calculators:
 98 |             calculator.clear()
 99 | 
100 |     def is_empty(self):
101 |         return ([calculator.heap_size for calculator in self._ap_calculators] ==
102 |                 [0 for _ in range(self._num_class)])
103 | 
104 |     def peek_map_at_n(self):
105 |         """Peek the non-interpolated mean average precision at n.
106 | 
107 |         Returns:
108 |           An array of non-interpolated average precision at n (default 0) for each
109 |           class.
110 |         """
111 |         aps = [self._ap_calculators[i].peek_ap_at_n()
112 |                for i in range(self._num_class)]
113 |         return aps
114 | 


--------------------------------------------------------------------------------
/model_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Contains a collection of util functions for model construction.
 16 | """
 17 | # noinspection PyUnresolvedReferences
 18 | import pathmagic
 19 | import numpy
 20 | import tensorflow as tf
 21 | from tensorflow import logging
 22 | from tensorflow import flags
 23 | import tensorflow.contrib.slim as slim
 24 | 
 25 | 
 26 | def SampleRandomSequence(model_input, num_frames, num_samples):
 27 |     """ Samples a random sequence of frames of size num_samples.
 28 | 
 29 |     Args:
 30 |         model_input: A tensor of size batch_size x max_frames x feature_size
 31 |         num_frames: A tensor of size batch_size x 1
 32 |         num_samples: A scalar
 33 | 
 34 |     Returns:
 35 |         `model_input`: A tensor of size batch_size x num_samples x feature_size
 36 |       """
 37 | 
 38 |     batch_size = tf.shape(model_input)[0]
 39 |     frame_index_offset = tf.tile(
 40 |         tf.expand_dims(tf.range(num_samples), 0), [batch_size, 1])
 41 |     max_start_frame_index = tf.maximum(num_frames - num_samples, 0)
 42 |     start_frame_index = tf.cast(
 43 |         tf.multiply(
 44 |             tf.random_uniform([batch_size, 1]),
 45 |             tf.cast(max_start_frame_index + 1, tf.float32)), tf.int32)
 46 |     frame_index = tf.minimum(start_frame_index + frame_index_offset,
 47 |                              tf.cast(num_frames - 1, tf.int32))
 48 |     batch_index = tf.tile(
 49 |         tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
 50 |     index = tf.stack([batch_index, frame_index], 2)
 51 |     return tf.gather_nd(model_input, index)
 52 | 
 53 | 
 54 | def SampleRandomFrames(model_input, num_frames, num_samples):
 55 |     """ Samples a random set of frames of size num_samples.
 56 | 
 57 |       Args:
 58 |         model_input: A tensor of size batch_size x max_frames x feature_size
 59 |         num_frames: A tensor of size batch_size x 1
 60 |         num_samples: A scalar
 61 | 
 62 |       Returns:
 63 |         `model_input`: A tensor of size batch_size x num_samples x feature_size
 64 |       """
 65 |     batch_size = tf.shape(model_input)[0]
 66 |     frame_index = tf.cast(
 67 |         tf.multiply(
 68 |             tf.random_uniform([batch_size, num_samples]),
 69 |             tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
 70 |     batch_index = tf.tile(
 71 |         tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
 72 |     index = tf.stack([batch_index, frame_index], 2)
 73 |     return tf.gather_nd(model_input, index)
 74 | 
 75 | 
 76 | def FramePooling(frames, method, **unused_params):
 77 |     """Pools over the frames of a video.
 78 | 
 79 |     Args:
 80 |     frames: A tensor.
 81 |     method: "average", "max", "attention", or "none".
 82 |     Returns:
 83 |     A tensor with shape [batch_size, feature_size] for average, max, or
 84 |     attention pooling. A tensor with shape [batch_size*num_frames, feature_size]
 85 |     for none pooling.
 86 | 
 87 |     Raises:
 88 |     ValueError: if method is other than "average", "max", "attention", or
 89 |     "none".
 90 |     """
 91 |     if method == "average":
 92 |         return tf.reduce_mean(frames, 1)
 93 |     elif method == "max":
 94 |         return tf.reduce_max(frames, 1)
 95 |     elif method == "none":
 96 |         feature_size = frames.shape_as_list()[2]
 97 |         return tf.reshape(frames, [-1, feature_size])
 98 |     else:
 99 |         raise ValueError("Unrecognized pooling method: %s" % method)
100 |         
101 | def SampleUniformFrames(model_input, num_frames, num_samples):
102 |     """ Uniformally samples (deterministically) a set of frames of size num_samples.
103 | 
104 |           Args:
105 |             model_input: A tensor of size batch_size x max_frames x feature_size
106 |             num_frames: A tensor of size batch_size x 1
107 |             num_samples: A scalar
108 | 
109 |           Returns:
110 |             `model_input`: A tensor of size batch_size x num_samples x feature_size
111 |           """
112 |     batch_size      = tf.shape(model_input)[0]
113 |     even_dist_samp  = tf.expand_dims(tf.linspace(0.0, 1.0, num_samples+1), axis=0)
114 |     even_dist_samp  = tf.slice(even_dist_samp, [0, 0], [1, num_samples])
115 |     frame_index = tf.cast(
116 |         tf.multiply(
117 |             tf.tile(even_dist_samp, [batch_size, 1]),
118 |             tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
119 |     batch_index = tf.tile(
120 |         tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
121 |     index = tf.stack([batch_index, frame_index], 2)
122 |     return tf.gather_nd(model_input, index)
123 | 


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS-IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Contains the base class for models."""
16 | 
17 | 
18 | class BaseModel(object):
19 |     """Inherit from this class when implementing new models."""
20 |     pass
21 | 
22 |     def create_model(self, unused_model_input, **unused_params):
23 |         raise NotImplementedError()
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/module_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Deep Topology All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from tensorflow.python.platform import tf_logging as logging
 16 | from tensorflow.python.ops import standard_ops
 17 | from tensorflow.python.framework import ops
 18 | import tensorflow as tf
 19 | import numbers
 20 | 
 21 | _NEG_INF = -1e9
 22 | 
 23 | 
 24 | def get_padding(x, padding_value=0):
 25 |     """Return float tensor representing the padding values in x.
 26 |       Args:
 27 |         x: int tensor with any shape
 28 |         padding_value: int value that
 29 |       Returns:
 30 |         flaot tensor with same shape as x containing values 0 or 1.
 31 |           0 -> non-padding, 1 -> padding
 32 |       """
 33 |     with tf.name_scope("padding"):
 34 |         return tf.to_float(tf.equal(x, padding_value))
 35 | 
 36 | 
 37 | def get_padding_bias(x):
 38 |     """Calculate bias tensor from padding values in tensor.
 39 |       Bias tensor that is added to the pre-softmax multi-headed attention logits,
 40 |       which has shape [batch_size, num_heads, length, length]. The tensor is zero at
 41 |       non-padding locations, and -1e9 (negative infinity) at padding locations.
 42 |       Args:
 43 |         x: int tensor with shape [batch_size, length]
 44 |       Returns:
 45 |         Attention bias tensor of shape [batch_size, 1, 1, length].
 46 |       """
 47 |     with tf.name_scope("attention_bias"):
 48 |         padding = get_padding(x)
 49 |         attention_bias = padding * _NEG_INF
 50 |         attention_bias = tf.expand_dims(
 51 |             tf.expand_dims(attention_bias, axis=1), axis=1)
 52 |     return attention_bias
 53 | 
 54 | 
 55 | def orthogonal_regularizer(scale, scope=None):
 56 |     """ Return a function that computes orthogonal regularization.
 57 |     :param scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer.
 58 |     :param scope: An optional scope name.
 59 |     :return: A function with signature `orthogonal_sum(weights)` that applies orthogonal regularization.
 60 |     """
 61 |     if isinstance(scale, numbers.Integral):
 62 |         raise ValueError('scale cannot be an integer: %s' % (scale,))
 63 |     if isinstance(scale, numbers.Real):
 64 |         if scale < 0.:
 65 |             raise ValueError('Setting a scale less than 0 on a regularizer: %g.' %
 66 |                              scale)
 67 |         if scale == 0.:
 68 |             logging.info('Scale of 0 disables regularizer.')
 69 |             return lambda _: None
 70 | 
 71 |     def orthogonal_sum(weights):
 72 |         """ Applies orthogonal regularization to weights. """
 73 |         with ops.name_scope(scope, 'orthogonal_regularizer', [weights]) as name:
 74 |             tensor_scale = ops.convert_to_tensor(scale,
 75 |                                                  dtype=weights.dtype.base_dtype,
 76 |                                                  name='scale')
 77 | 
 78 |             norm_weights = tf.nn.l2_normalize(weights, axis=1)
 79 |             anchor_weights_t = tf.transpose(norm_weights)
 80 |             det_reg = tf.matmul(anchor_weights_t, norm_weights)
 81 |             identity = tf.eye(tf.shape(det_reg)[0])
 82 |             det_reg = tf.subtract(det_reg, identity)
 83 |             det_reg = tf.reduce_sum(tf.abs(det_reg))
 84 | 
 85 |             # Print sum value before scaling
 86 |             det_reg = tf.Print(det_reg, [det_reg], "Orthogonal sum for \"{}\" :".format(name))
 87 | 
 88 |             return standard_ops.multiply(tensor_scale, det_reg, name=name)
 89 | 
 90 |     return orthogonal_sum
 91 | 
 92 | 
 93 | def reduce_var(x, axis=None, keep_dim=False):
 94 |     """ Return variance of a tensor, alongside the specified axis.
 95 | 
 96 |     Reference:
 97 |     https://stackoverflow.com/questions/39354566/what-is-the-equivalent-of-np-std-in-tensorflow
 98 | 
 99 |     :param x: Tensor or variable
100 |     :param axis: int
101 |     :param keep_dim: bool
102 |     :return: Tensor with the variance of elements of x
103 |     """
104 |     m = tf.reduce_mean(x, axis=axis, keep_dims=True)
105 |     devs_squared = tf.square(x - m)
106 |     return tf.reduce_mean(devs_squared, axis=axis, keep_dims=keep_dim)


--------------------------------------------------------------------------------
/modules.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Deep Topology All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS-IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Contains the base class for modules."""
16 | 
17 | 
18 | class BaseModule(object):
19 |     """Inherit from this class when implementing new modules."""
20 |     pass
21 | 
22 |     def forward(self, unused_module_input, **unused_params):
23 |         raise NotImplementedError()
24 | 


--------------------------------------------------------------------------------
/paper/Learnable_Pooling_Methods_for_Video_Classification.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pomonam/LearnablePoolingMethods/2d0b9b361785743ec397c6104feb30bb581700e5/paper/Learnable_Pooling_Methods_for_Video_Classification.pdf


--------------------------------------------------------------------------------
/pathmagic.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Deep Topology All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS-IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Path hack to make import statements work."""
16 | 
17 | import os
18 | import sys
19 | 
20 | # Explicitly add the file's directory to the path list.
21 | file_dir = os.path.dirname(__file__)
22 | sys.path.append(file_dir)
23 | sys.path.append(os.path.join(os.getcwd(), "modules"))
24 | 


--------------------------------------------------------------------------------
/readers.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Provides readers configured for different datasets."""
 16 | 
 17 | import tensorflow as tf
 18 | import utils
 19 | 
 20 | from tensorflow import logging
 21 | 
 22 | 
 23 | def resize_axis(tensor, axis, new_size, fill_value=0):
 24 |     """Truncates or pads a tensor to new_size on on a given axis.
 25 | 
 26 |       Truncate or extend tensor such that tensor.shape[axis] == new_size. If the
 27 |       size increases, the padding will be performed at the end, using fill_value.
 28 | 
 29 |       Args:
 30 |         tensor: The tensor to be resized.
 31 |         axis: An integer representing the dimension to be sliced.
 32 |         new_size: An integer or 0d tensor representing the new value for
 33 |           tensor.shape[axis].
 34 |         fill_value: Value to use to fill any new entries in the tensor. Will be
 35 |           cast to the type of tensor.
 36 | 
 37 |       Returns:
 38 |         The resized tensor.
 39 |       """
 40 |     tensor = tf.convert_to_tensor(tensor)
 41 |     shape = tf.unstack(tf.shape(tensor))
 42 | 
 43 |     pad_shape = shape[:]
 44 |     pad_shape[axis] = tf.maximum(0, new_size - shape[axis])
 45 | 
 46 |     shape[axis] = tf.minimum(shape[axis], new_size)
 47 |     shape = tf.stack(shape)
 48 | 
 49 |     resized = tf.concat([
 50 |         tf.slice(tensor, tf.zeros_like(shape), shape),
 51 |         tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype))
 52 |     ], axis)
 53 | 
 54 |     # Update shape.
 55 |     new_shape = tensor.get_shape().as_list()  # A copy is being made.
 56 |     new_shape[axis] = new_size
 57 |     resized.set_shape(new_shape)
 58 |     return resized
 59 | 
 60 | class BaseReader(object):
 61 |     """Inherit from this class when implementing new readers."""
 62 | 
 63 |     def prepare_reader(self, unused_filename_queue):
 64 |         """Create a thread for generating prediction and label tensors."""
 65 |         raise NotImplementedError()
 66 | 
 67 | 
 68 | class YT8MAggregatedFeatureReader(BaseReader):
 69 |     """Reads TFRecords of pre-aggregated Examples.
 70 | 
 71 |     The TFRecords must contain Examples with a sparse int64 'labels' feature and
 72 |     a fixed length float32 feature, obtained from the features in 'feature_name'.
 73 |     The float features are assumed to be an average of dequantized values.
 74 |     """
 75 | 
 76 |     def __init__(self,
 77 |                  num_classes=3862,
 78 |                  feature_sizes=[1024, 128],
 79 |                  feature_names=["mean_rgb", "mean_audio"]):
 80 |         """Construct a YT8MAggregatedFeatureReader.
 81 | 
 82 |         Args:
 83 |           num_classes: a positive integer for the number of classes.
 84 |           feature_sizes: positive integer(s) for the feature dimensions as a list.
 85 |           feature_names: the feature name(s) in the tensorflow record as a list.
 86 |         """
 87 | 
 88 |         assert len(feature_names) == len(feature_sizes), \
 89 |         "length of feature_names (={}) != length of feature_sizes (={})".format( \
 90 |         len(feature_names), len(feature_sizes))
 91 | 
 92 |         self.num_classes = num_classes
 93 |         self.feature_sizes = feature_sizes
 94 |         self.feature_names = feature_names
 95 | 
 96 |     def prepare_reader(self, filename_queue, batch_size=1024):
 97 |         """Creates a single reader thread for pre-aggregated YouTube 8M Examples.
 98 | 
 99 |         Args:
100 |           filename_queue: A tensorflow queue of filename locations.
101 | 
102 |         Returns:
103 |           A tuple of video indexes, features, labels, and padding data.
104 |         """
105 |         reader = tf.TFRecordReader()
106 |         _, serialized_examples = reader.read_up_to(filename_queue, batch_size)
107 | 
108 |         tf.add_to_collection("serialized_examples", serialized_examples)
109 |         return self.prepare_serialized_examples(serialized_examples)
110 | 
111 |     def prepare_serialized_examples(self, serialized_examples):
112 |         # set the mapping from the fields to data types in the proto
113 |         num_features = len(self.feature_names)
114 |         assert num_features > 0, "self.feature_names is empty!"
115 |         assert len(self.feature_names) == len(self.feature_sizes), \
116 |         "length of feature_names (={}) != length of feature_sizes (={})".format( \
117 |         len(self.feature_names), len(self.feature_sizes))
118 | 
119 |         feature_map = {"id": tf.FixedLenFeature([], tf.string),
120 |                        "labels": tf.VarLenFeature(tf.int64)}
121 |         for feature_index in range(num_features):
122 |             feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
123 |                 [self.feature_sizes[feature_index]], tf.float32)
124 | 
125 |         features = tf.parse_example(serialized_examples, features=feature_map)
126 |         labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
127 |         labels.set_shape([None, self.num_classes])
128 |         concatenated_features = tf.concat([
129 |             features[feature_name] for feature_name in self.feature_names], 1)
130 | 
131 |         return features["id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
132 | 
133 | 
134 | class YT8MFrameFeatureReader(BaseReader):
135 |     """Reads TFRecords of SequenceExamples.
136 | 
137 |     The TFRecords must contain SequenceExamples with the sparse in64 'labels'
138 |     context feature and a fixed length byte-quantized feature vector, obtained
139 |     from the features in 'feature_names'. The quantized features will be mapped
140 |     back into a range between min_quantized_value and max_quantized_value.
141 |     """
142 | 
143 |     def __init__(self,
144 |                num_classes=3862,
145 |                feature_sizes=[1024, 128],
146 |                feature_names=["rgb", "audio"],
147 |                max_frames=300):
148 |         """Construct a YT8MFrameFeatureReader.
149 | 
150 |         Args:
151 |           num_classes: a positive integer for the number of classes.
152 |           feature_sizes: positive integer(s) for the feature dimensions as a list.
153 |           feature_names: the feature name(s) in the tensorflow record as a list.
154 |           max_frames: the maximum number of frames to process.
155 |         """
156 | 
157 |         assert len(feature_names) == len(feature_sizes), \
158 |         "length of feature_names (={}) != length of feature_sizes (={})".format( \
159 |         len(feature_names), len(feature_sizes))
160 | 
161 |         self.num_classes = num_classes
162 |         self.feature_sizes = feature_sizes
163 |         self.feature_names = feature_names
164 |         self.max_frames = max_frames
165 | 
166 |     def get_video_matrix(self,
167 |                        features,
168 |                        feature_size,
169 |                        max_frames,
170 |                        max_quantized_value,
171 |                        min_quantized_value):
172 |         """Decodes features from an input string and quantizes it.
173 | 
174 |         Args:
175 |           features: raw feature values
176 |           feature_size: length of each frame feature vector
177 |           max_frames: number of frames (rows) in the output feature_matrix
178 |           max_quantized_value: the maximum of the quantized value.
179 |           min_quantized_value: the minimum of the quantized value.
180 | 
181 |         Returns:
182 |           feature_matrix: matrix of all frame-features
183 |           num_frames: number of frames in the sequence
184 |         """
185 |         decoded_features = tf.reshape(
186 |             tf.cast(tf.decode_raw(features, tf.uint8), tf.float32),
187 |             [-1, feature_size])
188 | 
189 |         num_frames = tf.minimum(tf.shape(decoded_features)[0], max_frames)
190 |         feature_matrix = utils.Dequantize(decoded_features,
191 |                                           max_quantized_value,
192 |                                           min_quantized_value)
193 |         feature_matrix = resize_axis(feature_matrix, 0, max_frames)
194 |         return feature_matrix, num_frames
195 | 
196 |     def prepare_reader(self,
197 |                      filename_queue,
198 |                      max_quantized_value=2,
199 |                      min_quantized_value=-2):
200 |         """Creates a single reader thread for YouTube8M SequenceExamples.
201 | 
202 |         Args:
203 |           filename_queue: A tensorflow queue of filename locations.
204 |           max_quantized_value: the maximum of the quantized value.
205 |           min_quantized_value: the minimum of the quantized value.
206 | 
207 |         Returns:
208 |           A tuple of video indexes, video features, labels, and padding data.
209 |         """
210 |         reader = tf.TFRecordReader()
211 |         _, serialized_example = reader.read(filename_queue)
212 | 
213 |         return self.prepare_serialized_examples(serialized_example,
214 |             max_quantized_value, min_quantized_value)
215 | 
216 |     def prepare_serialized_examples(self, serialized_example,
217 |                                     max_quantized_value=2, min_quantized_value=-2):
218 | 
219 |         contexts, features = tf.parse_single_sequence_example(
220 |             serialized_example,
221 |             context_features={"id": tf.FixedLenFeature(
222 |                 [], tf.string),
223 |                 "labels": tf.VarLenFeature(tf.int64)},
224 |             sequence_features={
225 |                 feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string)
226 |                 for feature_name in self.feature_names
227 |             })
228 | 
229 |         # read ground truth labels
230 |         labels = (tf.cast(
231 |             tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
232 |                                validate_indices=False),
233 |             tf.bool))
234 | 
235 |         # loads (potentially) different types of features and concatenates them
236 |         num_features = len(self.feature_names)
237 |         assert num_features > 0, "No feature selected: feature_names is empty!"
238 | 
239 |         assert len(self.feature_names) == len(self.feature_sizes), \
240 |             "length of feature_names (={}) != length of feature_sizes (={})".format(len(self.feature_names),
241 |                                                                                     len(self.feature_sizes))
242 | 
243 |         num_frames = -1  # the number of frames in the video
244 |         feature_matrices = [None] * num_features  # an array of different features
245 |         for feature_index in range(num_features):
246 |             feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
247 |                 features[self.feature_names[feature_index]],
248 |                 self.feature_sizes[feature_index],
249 |                 self.max_frames,
250 |                 max_quantized_value,
251 |                 min_quantized_value)
252 |             if num_frames == -1:
253 |                 num_frames = num_frames_in_this_feature
254 |             else:
255 |                 tf.assert_equal(num_frames, num_frames_in_this_feature)
256 | 
257 |             feature_matrices[feature_index] = feature_matrix
258 | 
259 |         # cap the number of frames at self.max_frames
260 |         num_frames = tf.minimum(num_frames, self.max_frames)
261 | 
262 |         # concatenate different features
263 |         video_matrix = tf.concat(feature_matrices, 1)
264 | 
265 |         # convert to batch format.
266 |         batch_video_ids = tf.expand_dims(contexts["id"], 0)
267 |         batch_video_matrix = tf.expand_dims(video_matrix, 0)
268 |         batch_labels = tf.expand_dims(labels, 0)
269 |         batch_frames = tf.expand_dims(num_frames, 0)
270 | 
271 |         return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
272 | 


--------------------------------------------------------------------------------
/rnn_modules.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Deep Topology All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS-IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # noinspection PyUnresolvedReferences
15 | import pathmagic
16 | import tensorflow as tf
17 | import modules
18 | 
19 | 
20 | class LstmLastHiddenModule(modules.BaseModule):
21 |     """ LSTM network that outputs the last hidden state. """
22 |     def __init__(self, lstm_size, lstm_layers, num_frames, output_dim, scope_id=None):
23 |         """ Initialize LSTM hidden module.
24 |         :param lstm_size: int
25 |         :param lstm_layers: int
26 |         :param num_frames: num_frames x 1
27 |         :param output_dim: int
28 |         :param scope_id: Object
29 |         """
30 |         self.lstm_size = lstm_size
31 |         self.lstm_layers = lstm_layers
32 |         self.output_dim = output_dim
33 |         self.num_frames = num_frames
34 |         self.scope_id = scope_id
35 | 
36 |     def forward(self, inputs, **unused_params):
37 |         """ Forward method for LstmLastHiddenModule.
38 |         :param inputs: batch_size x max_frames x num_features
39 |         :return: batch_size x output_dim
40 |         """
41 |         stacked_lstm = tf.contrib.rnn.MultiRNNCell(
42 |             [
43 |                 tf.contrib.rnn.BasicLSTMCell(
44 |                     self.lstm_size, forget_bias=1.0)
45 |                 for _ in range(self.lstm_layers)
46 | 
47 |             ])
48 | 
49 |         outputs, state = tf.nn.dynamic_rnn(stacked_lstm, inputs,
50 |                                            sequence_length=self.num_frames,
51 |                                            dtype=tf.float32)
52 |         # Only output the hidden state at the end.
53 |         return state[-1].h
54 | 
55 | 
56 | class LstmConcatAverageModule(modules.BaseModule):
57 |     """ LSTM layers with stores the average of previous layers. """
58 |     def __init__(self, lstm_size, num_layers, max_frame):
59 |         """ Initialize LSTM average concatenation module.
60 |         :param lstm_size: int
61 |         :param num_layers: int
62 |         :param max_frame: num_frames x 1
63 |         """
64 |         self.lstm_size = lstm_size
65 |         self.num_layers = num_layers
66 |         self.max_frame = max_frame
67 | 
68 |     def forward(self, inputs, **unused_params):
69 |         """ Forward method for LstmConcatAverageModule.
70 |         :param inputs: batch_size x max_frames x num_features
71 |         :return: batch_size x output_dim
72 |         """
73 |         stacked_lstm = tf.contrib.rnn.MultiRNNCell(
74 |             [
75 |                 tf.contrib.rnn.BasicLSTMCell(
76 |                     self.lstm_size, forget_bias=1.0, state_is_tuple=False)
77 |                 for _ in range(self.num_layers)
78 |             ], state_is_tuple=False)
79 | 
80 |         outputs, state = tf.nn.dynamic_rnn(stacked_lstm, inputs,
81 |                                            sequence_length=self.max_frame,
82 |                                            dtype=tf.float32)
83 | 
84 |         context_memory = tf.nn.l2_normalize(tf.reduce_sum(outputs, axis=1), dim=1)
85 |         average_state = tf.nn.l2_normalize(tf.reduce_sum(inputs, axis=1), dim=1)
86 |         final_state = tf.concat([context_memory, state, average_state], 1)
87 | 
88 |         return final_state
89 | 


--------------------------------------------------------------------------------
/scripts/batch_evaluate.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Deep Topology All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """ A script to batch-evaluate the algorithm on different epochs. """
 16 | import glob
 17 | import json
 18 | import os
 19 | import time
 20 | import sys
 21 | import eval_util
 22 | import losses
 23 | import video_level_models
 24 | import frame_level_models
 25 | import readers
 26 | import tensorflow as tf
 27 | from tensorflow.python.lib.io import file_io
 28 | from tensorflow import app
 29 | from tensorflow import flags
 30 | from tensorflow import gfile
 31 | from tensorflow import logging
 32 | import utils
 33 | 
 34 | FLAGS = flags.FLAGS
 35 | 
 36 | 
 37 | 
 38 | def find_class_by_name(name, modules):
 39 |     """ Searches the provided modules for the named class and returns it. """
 40 |     modules = [getattr(module, name, None) for module in modules]
 41 |     return next(a for a in modules if a)
 42 | 
 43 | 
 44 | def get_input_evaluation_tensors(reader,
 45 |                                  data_pattern,
 46 |                                  batch_size=1024,
 47 |                                  num_readers=1):
 48 |     """Creates the section of the graph which reads the evaluation data.
 49 | 
 50 |       Args:
 51 |         reader: A class which parses the training data.
 52 |         data_pattern: A 'glob' style path to the data files.
 53 |         batch_size: How many examples to process at a time.
 54 |         num_readers: How many I/O threads to use.
 55 | 
 56 |       Returns:
 57 |         A tuple containing the features tensor, labels tensor, and optionally a
 58 |         tensor containing the number of frames per video. The exact dimensions
 59 |         depend on the reader being used.
 60 | 
 61 |       Raises:
 62 |         IOError: If no files matching the given pattern were found.
 63 |     """
 64 |     logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
 65 |     with tf.name_scope("eval_input"):
 66 |         files = gfile.Glob(data_pattern)
 67 |         if not files:
 68 |             raise IOError("Unable to find the evaluation files.")
 69 |         logging.info("number of evaluation files: " + str(len(files)))
 70 |         filename_queue = tf.train.string_input_producer(
 71 |             files, shuffle=False, num_epochs=1)
 72 |         eval_data = [
 73 |             reader.prepare_reader(filename_queue) for _ in range(num_readers)
 74 |         ]
 75 |         return tf.train.batch_join(
 76 |             eval_data,
 77 |             batch_size=batch_size,
 78 |             capacity=3 * batch_size,
 79 |             allow_smaller_final_batch=True,
 80 |             enqueue_many=True)
 81 | 
 82 | 
 83 | def build_graph(reader,
 84 |                 model,
 85 |                 eval_data_pattern,
 86 |                 label_loss_fn,
 87 |                 batch_size=1024,
 88 |                 num_readers=1):
 89 |     """Creates the Tensorflow graph for evaluation.
 90 | 
 91 |       Args:
 92 |         reader: The data file reader. It should inherit from BaseReader.
 93 |         model: The core model (e.g. logistic or neural net). It should inherit
 94 |                from BaseModel.
 95 |         eval_data_pattern: glob path to the evaluation data files.
 96 |         label_loss_fn: What kind of loss to apply to the model. It should inherit
 97 |                     from BaseLoss.
 98 |         batch_size: How many examples to process at a time.
 99 |         num_readers: How many threads to use for I/O operations.
100 |     """
101 | 
102 |     global_step = tf.Variable(0, trainable=False, name="global_step")
103 |     video_id_batch, model_input_raw, labels_batch, num_frames = get_input_evaluation_tensors(
104 |         # pylint: disable=g-line-too-long
105 |         reader,
106 |         eval_data_pattern,
107 |         batch_size=batch_size,
108 |         num_readers=num_readers)
109 |     tf.summary.histogram("model_input_raw", model_input_raw)
110 | 
111 |     feature_dim = len(model_input_raw.get_shape()) - 1
112 | 
113 |     # Normalize input features.
114 |     model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)
115 | 
116 |     with tf.variable_scope("tower"):
117 |         result = model.create_model(model_input,
118 |                                     num_frames=num_frames,
119 |                                     vocab_size=reader.num_classes,
120 |                                     labels=labels_batch,
121 |                                     is_training=False)
122 |         predictions = result["predictions"]
123 |         tf.summary.histogram("model_activations", predictions)
124 |         if "loss" in result.keys():
125 |             label_loss = result["loss"]
126 |         else:
127 |             label_loss = label_loss_fn.calculate_loss(predictions, labels_batch)
128 | 
129 |     tf.add_to_collection("global_step", global_step)
130 |     tf.add_to_collection("loss", label_loss)
131 |     tf.add_to_collection("predictions", predictions)
132 |     tf.add_to_collection("input_batch", model_input)
133 |     tf.add_to_collection("input_batch_raw", model_input_raw)
134 |     tf.add_to_collection("video_id_batch", video_id_batch)
135 |     tf.add_to_collection("num_frames", num_frames)
136 |     tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
137 |     tf.add_to_collection("summary_op", tf.summary.merge_all())
138 | 
139 | 
140 | def get_latest_checkpoint():
141 |     index_files = file_io.get_matching_files(os.path.join(FLAGS.train_dir, 'model.ckpt-*.index'))
142 |     tf.logging.debug("Looking at {}".format(index_files))
143 | 
144 |     # No files
145 |     if not index_files:
146 |         return None
147 | 
148 |     # Index file path with the maximum step size.
149 |     latest_index_file = sorted(
150 |         [(int(os.path.basename(f).split("-")[-1].split(".")[0]), f)
151 |          for f in index_files])[-1][1]
152 | 
153 |     # Chop off .index suffix and return
154 |     return latest_index_file[:-6]
155 | 
156 | 
157 | def evaluate():
158 |     tf.set_random_seed(0)  # for reproducibility
159 | 
160 |     # Write json of flags
161 |     model_flags_path = os.path.join(FLAGS.train_dir, "model_flags.json")
162 |     if not file_io.file_exists(model_flags_path):
163 |         raise IOError(("Cannot find file %s. Did you run train.py on the same "
164 |                        "--train_dir?") % model_flags_path)
165 |     flags_dict = json.loads(file_io.FileIO(model_flags_path, mode="r").read())
166 | 
167 |     with tf.Graph().as_default():
168 |         # convert feature_names and feature_sizes to lists of values
169 |         feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
170 |             flags_dict["feature_names"], flags_dict["feature_sizes"])
171 | 
172 |         if flags_dict["frame_features"]:
173 |             reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
174 |                                                     feature_sizes=feature_sizes)
175 |         else:
176 |             reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
177 |                                                          feature_sizes=feature_sizes)
178 | 
179 |         model = find_class_by_name(flags_dict["model"],
180 |                                    [frame_level_models, video_level_models])()
181 |         label_loss_fn = find_class_by_name(flags_dict["label_loss"], [losses])()
182 | 
183 |         if FLAGS.eval_data_pattern is "":
184 |             raise IOError("'eval_data_pattern' was not specified. " +
185 |                           "Nothing to evaluate.")
186 | 
187 |         build_graph(
188 |             reader=reader,
189 |             model=model,
190 |             eval_data_pattern=FLAGS.eval_data_pattern,
191 |             label_loss_fn=label_loss_fn,
192 |             num_readers=FLAGS.num_readers,
193 |             batch_size=FLAGS.batch_size)
194 |         logging.info("built evaluation graph")
195 | 
196 |         summary_writer = tf.summary.FileWriter(
197 |             FLAGS.train_dir, graph=tf.get_default_graph())
198 | 
199 |         evl_metrics = eval_util.EvaluationMetrics(reader.num_classes, FLAGS.top_k)
200 | 


--------------------------------------------------------------------------------
/scripts/generate_gcloud_evaluation.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Deep Topology All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS-IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Generate command line arguments for Google Cloud MLE training."""
16 | 
17 | import os
18 | 
19 | ####################################################################
20 | # Configuration ####################################################
21 | ####################################################################
22 | # yaml settings. cloudml-4gpu.yaml, cloudml-gpu.yaml, cloudml-gpu-distributed.yaml
23 | CLOUD_GPU = "cloudml-gpu.yaml"
24 | # Name and version of the model
25 | MODEL_NAME = "WillowModel"
26 | MODEL_VERSION = ""
27 | # Does it require frame-level models?
28 | FRAME_LEVEL = True
29 | # What features? e.g. RGB, audio
30 | FEATURES = "rgb,audio"
31 | 
32 | 
33 | def main():
34 |     # Start by defining a job name.
35 |     command = "JOB_NAME=yt8m_eval_$(date +%Y%m%d_%H%M%S); "
36 |     command += "gcloud --verbosity=debug ml-engine jobs submit training $JOB_NAME "
37 |     command += "--package-path=youtube-8m --module-name=youtube-8m.eval "
38 |     command += "--staging-bucket=$BUCKET_NAME --region=us-east1 "
39 |     command += "--config=youtube-8m/cloudml_config/{} ".format(CLOUD_GPU)
40 |     if FRAME_LEVEL:
41 |         command += "-- --eval_data_pattern='gs://youtube8m-ml-us-east1/2/frame/validate/validate*.tfrecord' "
42 |         command += "--frame_features=True "
43 |     else:
44 |         command += "-- --eval_data_pattern='gs://youtube8m-ml-us-east1/2/video/validate/validate*.tfrecord "
45 |         command += "--frame_features=False "
46 |     command += "--train_dir=$BUCKET_NAME/{} ".format(MODEL_NAME + str(MODEL_VERSION))
47 |     command += "--run_once=True"
48 |     return command
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     current_directory = os.getcwd()
53 |     current_directory = current_directory.split("\\")[:-2]
54 |     print("Run the following command here: {}".format(current_directory))
55 |     print(main())
56 | 


--------------------------------------------------------------------------------
/scripts/generate_gcloud_inference.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Deep Topology All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS-IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Generate command line arguments for Google Cloud MLE training."""
16 | 
17 | import os
18 | 
19 | ####################################################################
20 | # Configuration ####################################################
21 | ####################################################################
22 | # yaml settings. cloudml-4gpu.yaml, cloudml-gpu.yaml, cloudml-gpu-distributed.yaml
23 | CLOUD_GPU = "cloudml-gpu-inference.yaml"
24 | # Name and version of the model
25 | MODEL_NAME = "WillowModel"
26 | MODEL_VERSION = ""
27 | # Does it require frame-level models?
28 | FRAME_LEVEL = True
29 | # What features? e.g. RGB, audio
30 | FEATURES = "rgb,audio"
31 | # Some additional flags to execute.
32 | EXTRA = "--batch_size 128"
33 | 
34 | 
35 | def main():
36 |     # Start by defining a job name.
37 |     command = "JOB_NAME=yt8m_inference_$(date +%Y%m%d_%H%M%S); "
38 |     command += "gcloud --verbosity=debug ml-engine jobs submit training $JOB_NAME "
39 |     command += "--package-path=youtube-8m --module-name=youtube-8m.inference "
40 |     command += "--staging-bucket=$BUCKET_NAME --region=us-east1 "
41 |     command += "--config=youtube-8m/cloudml_config/{} ".format(CLOUD_GPU)
42 |     if FRAME_LEVEL:
43 |         command += "-- --input_data_pattern='gs://youtube8m-ml-us-east1/2/frame/test/test*.tfrecord' "
44 |         command += "--frame_features=True "
45 |     else:
46 |         command += "-- --input_data_pattern='gs://youtube8m-ml-us-east1/2/video/test/test*.tfrecord "
47 |         command += "--frame_features=False "
48 |     command += "--train_dir=$BUCKET_NAME/{} ".format(MODEL_NAME + str(MODEL_VERSION))
49 |     command += "--output_file=$BUCKET_NAME/{}/predictions.csv".format(MODEL_NAME + str(MODEL_VERSION))
50 |     return command
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     current_directory = os.getcwd()
55 |     current_directory = current_directory.split("\\")[:-2]
56 |     print("Run the following command here: {}".format(current_directory))
57 |     print(main())
58 | 


--------------------------------------------------------------------------------
/scripts/generate_gcloud_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Deep Topology All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS-IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Generate command line arguments for Google Cloud MLE training."""
16 | 
17 | import os
18 | 
19 | ####################################################################
20 | # Configuration ####################################################
21 | ####################################################################
22 | # yaml settings. cloudml-4gpu.yaml, cloudml-gpu.yaml, cloudml-gpu-distributed.yaml
23 | CLOUD_GPU = "cloudml-gpu.yaml"
24 | # Name and version of the model
25 | MODEL_NAME = "WillowModel"
26 | MODEL_VERSION = 1
27 | # Does it require frame-level models?
28 | FRAME_LEVEL = True
29 | # What features? e.g. RGB, audio
30 | FEATURES = "rgb,audio"
31 | # Batch size.
32 | BATCH_SIZE = 128
33 | # Base LR.
34 | BASE_LEARNING_RATE = 0.0002
35 | # Initialize a new model?
36 | START_NEW_MODEL = True
37 | 
38 | 
39 | def main():
40 |     # Start by defining a job name.
41 |     command = "JOB_NAME=yt8m_train_$(date +%Y%m%d_%H%M%S); "
42 |     command += "gcloud --verbosity=debug ml-engine jobs submit training $JOB_NAME "
43 |     command += "--package-path=youtube-8m --module-name=youtube-8m.train "
44 |     command += "--staging-bucket=$BUCKET_NAME --region=us-east1 "
45 |     command += "--config=youtube-8m/cloudml_config/{} ".format(CLOUD_GPU)
46 |     if FRAME_LEVEL:
47 |         command += "-- --train_data_pattern='gs://youtube8m-ml-us-east1/2/frame/train/train*.tfrecord' "
48 |         command += "--frame_features=True "
49 |     else:
50 |         command += "-- --train_data_pattern='gs://youtube8m-ml-us-east1/2/video/train/train*.tfrecord' "
51 |         command += "--frame_features=False "
52 |     command += "--model={} ".format(MODEL_NAME)
53 |     command += "--feature_names='{}' ".format(FEATURES)
54 |     command += "--feature_sizes='1024,128' "
55 |     command += "--batch_size={} ".format(str(BATCH_SIZE))
56 |     command += "--train_dir=$BUCKET_NAME/{} ".format(MODEL_NAME + str(MODEL_VERSION))
57 |     command += "--base_learning_rate={} ".format(str(BASE_LEARNING_RATE))
58 |     if START_NEW_MODEL:
59 |         command += "--start_new_model"
60 |     return command
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     current_directory = os.getcwd()
65 |     current_directory = current_directory.split("\\")[:-2]
66 |     print("Run the following command here: {}".format(current_directory))
67 |     print(main())
68 | 


--------------------------------------------------------------------------------
/scripts/generate_gcloud_train_valid.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Deep Topology All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS-IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | Generate command line arguments for Google Cloud MLE training.
17 | Run command: BUCKET_NAME=gs://dtp1_yt8m_train_bucket
18 | """
19 | 
20 | 
21 | import os
22 | 
23 | ####################################################################
24 | # Configuration ####################################################
25 | ####################################################################
26 | # yaml settings. cloudml-4gpu.yaml, cloudml-gpu.yaml, cloudml-gpu-distributed.yaml
27 | CLOUD_GPU = "cloudml-gpu.yaml"
28 | # Name and version of the model
29 | MODEL_NAME = "TransformerEncoder"
30 | MODEL_VERSION = ""
31 | # Does it require frame-level models?
32 | FRAME_LEVEL = True
33 | # What features? e.g. RGB, audio
34 | FEATURES = "rgb,audio"
35 | # Batch size.
36 | BATCH_SIZE = 64
37 | # Base LR.
38 | BASE_LEARNING_RATE = 0.00005
39 | # Initialize a new model?
40 | START_NEW_MODEL = True
41 | EXTRA = "-learning_rate_decay=0.7"
42 | # EXTRA = "-tembed_v3_video_anchor_size=64 " \
43 | #         "-tembed_v3_audio_anchor_size=8 " \
44 | #         "-tembed_v3_distrib_concat_hidden_size=4096 " \
45 | #         "-tembed_v3_temporal_concat_hidden_size=4096 " \
46 | #         "-tembed_v3_full_concat_hidden_size=8192"
47 | 
48 | 
49 | def main():
50 |     # Start by defining a job name.
51 |     local_command = "gcloud ml-engine local train "
52 |     command = "JOB_NAME=yt8m_train_$(date +%Y%m%d_%H%M%S); "
53 |     command += "gcloud --verbosity=debug ml-engine jobs submit training $JOB_NAME "
54 |     command += "--package-path=youtube-8m --module-name=youtube-8m.train "
55 |     local_command += "--package-path=youtube-8m --module-name=youtube-8m.train "
56 |     command += "--staging-bucket=$BUCKET_NAME --region=us-east1 "
57 |     command += "--config=youtube-8m/cloudml_config/{} ".format(CLOUD_GPU)
58 |     if FRAME_LEVEL:
59 |         command += "-- --train_data_pattern='gs://youtube8m-ml-us-east1/2/frame/train/*.tfrecord' "
60 |         command += "--frame_features=True "
61 |         local_command += "-- --train_data_pattern='gs://youtube8m-ml-us-east1/2/frame/train/*.tfrecord' "
62 |         local_command += "--frame_features=True "
63 |     else:
64 |         command += "-- --train_data_pattern='gs://youtube8m-ml-us-east1/2/video/train/*.tfrecord' "
65 |         command += "--frame_features=False "
66 |         local_command += "-- --train_data_pattern='gs://youtube8m-ml-us-east1/2/video/train/*.tfrecord' "
67 |         local_command += "--frame_features=False "
68 |     command += "--base_learning_rate={} ".format(str(BASE_LEARNING_RATE))
69 |     local_command += "--base_learning_rate={} ".format(str(BASE_LEARNING_RATE))
70 |     command += "--model={} ".format(MODEL_NAME)
71 |     local_command += "--model={} ".format(MODEL_NAME)
72 |     command += "--feature_names='{}' ".format(FEATURES)
73 |     local_command += "--feature_names='{}' ".format(FEATURES)
74 |     command += "--feature_sizes='1024,128' "
75 |     local_command += "--feature_sizes='1024,128' "
76 |     command += "--batch_size={} ".format(str(BATCH_SIZE))
77 |     local_command += "--batch_size={} ".format(str(BATCH_SIZE))
78 |     command += "--train_dir=$BUCKET_NAME/{} ".format(MODEL_NAME + str(MODEL_VERSION))
79 |     local_command += "--train_dir=/tmp/yt8m_train "
80 |     command += "--base_learning_rate={} ".format(str(BASE_LEARNING_RATE))
81 |     local_command += "--base_learning_rate={} ".format(str(BASE_LEARNING_RATE))
82 |     if START_NEW_MODEL:
83 |         command += "--start_new_model "
84 |         local_command += "--start_new_model "
85 |     local_command += "--runtime-version=1.8"
86 |     command += EXTRA
87 |     local_command += EXTRA
88 |     return command, local_command
89 | 
90 | 
91 | if __name__ == "__main__":
92 |     current_directory = os.getcwd()
93 |     current_directory = "/".join(current_directory.split("/")[:-2])
94 |     print("Run the following command here: {}".format(current_directory))
95 |     c, lc = main()
96 |     print("Local: \n{}".format(lc))
97 |     print("Cloud: \n{}".format(c))
98 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Binary for training Tensorflow models on the YouTube-8M dataset."""
 15 | import os
 16 | import sys
 17 | 
 18 | # Explicitly add the file's directory to the path list.
 19 | file_dir = os.path.dirname(__file__)
 20 | sys.path.append(file_dir)
 21 | sys.path.append(os.path.join(os.getcwd(), "modules"))
 22 | 
 23 | import json
 24 | import os
 25 | import time
 26 | 
 27 | import eval_util
 28 | import export_model
 29 | import losses
 30 | import frame_level_models
 31 | import video_level_models
 32 | import readers
 33 | import tensorflow as tf
 34 | import tensorflow.contrib.slim as slim
 35 | from tensorflow import app
 36 | from tensorflow import flags
 37 | from tensorflow import gfile
 38 | from tensorflow import logging
 39 | from tensorflow.python.client import device_lib
 40 | import utils
 41 | 
 42 | FLAGS = flags.FLAGS
 43 | 
 44 | if __name__ == "__main__":
 45 |     flags.DEFINE_string("train_dir", "/tmp/yt8m_model/",
 46 |                         "The directory to save the model files in.")
 47 |     flags.DEFINE_string(
 48 |         "train_data_pattern", "gs://youtube8m-ml-us-east1/2/frame/train/train*.tfrecord,"
 49 |                               "gs://youtube8m-ml-us-east1/2/frame/validate/validate*.tfrecord",
 50 |         "File glob for the training dataset. If the files refer to Frame Level "
 51 |         "features (i.e. tensorflow.SequenceExample), then set --reader_type "
 52 |         "format. The (Sequence)Examples are expected to have 'rgb' byte array "
 53 |         "sequence feature as well as a 'labels' int64 context feature.")
 54 |     flags.DEFINE_string("feature_names", "mean_rgb", "Name of the feature "
 55 |                                                      "to use for training.")
 56 |     flags.DEFINE_string("feature_sizes", "1024", "Length of the feature vectors.")
 57 | 
 58 |     # Model flags.
 59 |     flags.DEFINE_bool(
 60 |         "frame_features", False,
 61 |         "If set, then --train_data_pattern must be frame-level features. "
 62 |         "Otherwise, --train_data_pattern must be aggregated video-level "
 63 |         "features. The model must also be set appropriately (i.e. to read 3D "
 64 |         "batches VS 4D batches.")
 65 |     flags.DEFINE_string(
 66 |         "model", "LogisticModel",
 67 |         "Which architecture to use for the model. Models are defined "
 68 |         "in models.py.")
 69 |     flags.DEFINE_bool(
 70 |         "start_new_model", False,
 71 |         "If set, this will not resume from a checkpoint and will instead create a"
 72 |         " new model instance.")
 73 | 
 74 |     # Training flags.
 75 |     flags.DEFINE_integer("num_gpu", 1,
 76 |                          "The maximum number of GPU devices to use for training. "
 77 |                          "Flag only applies if GPUs are installed")
 78 |     flags.DEFINE_integer("batch_size", 1024,
 79 |                          "How many examples to process per batch for training.")
 80 |     flags.DEFINE_string("label_loss", "CrossEntropyLoss",
 81 |                         "Which loss function to use for training the model.")
 82 |     flags.DEFINE_float(
 83 |         "regularization_penalty", 1.0,
 84 |         "How much weight to give to the regularization loss (the label loss has "
 85 |         "a weight of 1).")
 86 |     flags.DEFINE_float("base_learning_rate", 0.01,
 87 |                        "Which learning rate to start with.")
 88 |     flags.DEFINE_float("learning_rate_decay", 0.95,
 89 |                        "Learning rate decay factor to be applied every "
 90 |                        "learning_rate_decay_examples.")
 91 |     flags.DEFINE_float("learning_rate_decay_examples", 4000000,
 92 |                        "Multiply current learning rate by learning_rate_decay "
 93 |                        "every learning_rate_decay_examples.")
 94 |     flags.DEFINE_integer("num_epochs", 5,
 95 |                          "How many passes to make over the dataset before "
 96 |                          "halting training.")
 97 |     flags.DEFINE_integer("max_steps", None,
 98 |                          "The maximum number of iterations of the training loop.")
 99 |     flags.DEFINE_integer("export_model_steps", 1000,
100 |                          "The period, in number of steps, with which the model "
101 |                          "is exported for batch prediction.")
102 | 
103 |     # Other flags.
104 |     flags.DEFINE_integer("num_readers", 8,
105 |                          "How many threads to use for reading input files.")
106 |     flags.DEFINE_string("optimizer", "AdamOptimizer",
107 |                         "What optimizer class to use.")
108 |     flags.DEFINE_float("clip_gradient_norm", 1.0, "Norm to clip gradients to.")
109 |     flags.DEFINE_bool(
110 |         "log_device_placement", False,
111 |         "Whether to write the device on which every op will run into the "
112 |         "logs on startup.")
113 | 
114 | 
115 | def validate_class_name(flag_value, category, modules, expected_superclass):
116 |     """Checks that the given string matches a class of the expected type.
117 |       Args:
118 |         flag_value: A string naming the class to instantiate.
119 |         category: A string used further describe the class in error messages
120 |                   (e.g. 'model', 'reader', 'loss').
121 |         modules: A list of modules to search for the given class.
122 |         expected_superclass: A class that the given class should inherit from.
123 |       Raises:
124 |         FlagsError: If the given class could not be found or if the first class
125 |         found with that name doesn't inherit from the expected superclass.
126 |       Returns:
127 |         True if a class was found that matches the given constraints.
128 |       """
129 |     candidates = [getattr(module, flag_value, None) for module in modules]
130 |     for candidate in candidates:
131 |         if not candidate:
132 |             continue
133 |         if not issubclass(candidate, expected_superclass):
134 |             raise flags.FlagsError("%s '%s' doesn't inherit from %s." %
135 |                                    (category, flag_value,
136 |                                     expected_superclass.__name__))
137 |         return True
138 |     raise flags.FlagsError("Unable to find %s '%s'." % (category, flag_value))
139 | 
140 | 
141 | def get_input_data_tensors(reader,
142 |                            data_pattern,
143 |                            batch_size=1000,
144 |                            num_epochs=None,
145 |                            num_readers=1):
146 |     """Creates the section of the graph which reads the training data.
147 |       Args:
148 |         reader: A class which parses the training data.
149 |         data_pattern: A 'glob' style path to the data files.
150 |         batch_size: How many examples to process at a time.
151 |         num_epochs: How many passes to make over the training data. Set to 'None'
152 |                     to run indefinitely.
153 |         num_readers: How many I/O threads to use.
154 |       Returns:
155 |         A tuple containing the features tensor, labels tensor, and optionally a
156 |         tensor containing the number of frames per video. The exact dimensions
157 |         depend on the reader being used.
158 |       Raises:
159 |         IOError: If no files matching the given pattern were found.
160 |       """
161 |     logging.info("Using batch size of " + str(batch_size) + " for training.")
162 |     with tf.name_scope("train_input"):
163 |         file_dirs = data_pattern.split(",")
164 |         files = list()
165 |         for f in file_dirs:
166 |             cur_file = gfile.Glob(f)
167 |             files.extend(cur_file)
168 |         if not files:
169 |             raise IOError("Unable to find training files. data_pattern='" +
170 |                           data_pattern + "'.")
171 |         logging.info("Number of training files: %s.", str(len(files)))
172 |         filename_queue = tf.train.string_input_producer(
173 |             files, num_epochs=num_epochs, shuffle=True)
174 |         training_data = [
175 |             reader.prepare_reader(filename_queue) for _ in range(num_readers)
176 |         ]
177 | 
178 |         return tf.train.shuffle_batch_join(
179 |             training_data,
180 |             batch_size=batch_size,
181 |             capacity=batch_size * 5,
182 |             min_after_dequeue=batch_size,
183 |             allow_smaller_final_batch=True,
184 |             enqueue_many=True)
185 | 
186 | 
187 | def find_class_by_name(name, modules):
188 |     """Searches the provided modules for the named class and returns it."""
189 |     modules = [getattr(module, name, None) for module in modules]
190 |     return next(a for a in modules if a)
191 | 
192 | 
193 | def build_graph(reader,
194 |                 model,
195 |                 train_data_pattern,
196 |                 label_loss_fn=losses.CrossEntropyLoss(),
197 |                 batch_size=1000,
198 |                 base_learning_rate=0.01,
199 |                 learning_rate_decay_examples=1000000,
200 |                 learning_rate_decay=0.95,
201 |                 optimizer_class=tf.train.AdamOptimizer,
202 |                 clip_gradient_norm=1.0,
203 |                 regularization_penalty=1,
204 |                 num_readers=1,
205 |                 num_epochs=None):
206 |     """Creates the Tensorflow graph.
207 |       This will only be called once in the life of
208 |       a training model, because after the graph is created the model will be
209 |       restored from a meta graph file rather than being recreated.
210 |       Args:
211 |         reader: The data file reader. It should inherit from BaseReader.
212 |         model: The core model (e.g. logistic or neural net). It should inherit
213 |                from BaseModel.
214 |         train_data_pattern: glob path to the training data files.
215 |         label_loss_fn: What kind of loss to apply to the model. It should inherit
216 |                     from BaseLoss.
217 |         batch_size: How many examples to process at a time.
218 |         base_learning_rate: What learning rate to initialize the optimizer with.
219 |         optimizer_class: Which optimization algorithm to use.
220 |         clip_gradient_norm: Magnitude of the gradient to clip to.
221 |         regularization_penalty: How much weight to give the regularization loss
222 |                                 compared to the label loss.
223 |         num_readers: How many threads to use for I/O operations.
224 |         num_epochs: How many passes to make over the data. 'None' means an
225 |                     unlimited number of passes.
226 |       """
227 | 
228 |     global_step = tf.Variable(0, trainable=False, name="global_step")
229 | 
230 |     local_device_protos = device_lib.list_local_devices()
231 |     gpus = [x.name for x in local_device_protos if x.device_type == 'GPU']
232 |     gpus = gpus[:FLAGS.num_gpu]
233 |     num_gpus = len(gpus)
234 | 
235 |     if num_gpus > 0:
236 |         logging.info("Using the following GPUs to train: " + str(gpus))
237 |         num_towers = num_gpus
238 |         device_string = '/gpu:%d'
239 |     else:
240 |         logging.info("No GPUs found. Training on CPU.")
241 |         num_towers = 1
242 |         device_string = '/cpu:%d'
243 | 
244 |     learning_rate = tf.train.exponential_decay(
245 |         base_learning_rate,
246 |         global_step * batch_size * num_towers,
247 |         learning_rate_decay_examples,
248 |         learning_rate_decay,
249 |         staircase=True)
250 |     tf.summary.scalar('learning_rate', learning_rate)
251 | 
252 |     optimizer = optimizer_class(learning_rate)
253 |     unused_video_id, model_input_raw, labels_batch, num_frames = (
254 |         get_input_data_tensors(
255 |             reader,
256 |             train_data_pattern,
257 |             batch_size=batch_size * num_towers,
258 |             num_readers=num_readers,
259 |             num_epochs=num_epochs))
260 |     tf.summary.histogram("model/input_raw", model_input_raw)
261 | 
262 |     feature_dim = len(model_input_raw.get_shape()) - 1
263 | 
264 |     model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)
265 | 
266 |     tower_inputs = tf.split(model_input, num_towers)
267 |     tower_labels = tf.split(labels_batch, num_towers)
268 |     tower_num_frames = tf.split(num_frames, num_towers)
269 |     tower_gradients = []
270 |     tower_predictions = []
271 |     tower_label_losses = []
272 |     tower_reg_losses = []
273 |     for i in range(num_towers):
274 |         # For some reason these 'with' statements can't be combined onto the same
275 |         # line. They have to be nested.
276 |         with tf.device(device_string % i):
277 |             with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)):
278 |                 with (
279 |                 slim.arg_scope([slim.model_variable, slim.variable], device="/cpu:0" if num_gpus != 1 else "/gpu:0")):
280 |                     result = model.create_model(
281 |                         tower_inputs[i],
282 |                         num_frames=tower_num_frames[i],
283 |                         vocab_size=reader.num_classes,
284 |                         labels=tower_labels[i])
285 |                     for variable in slim.get_model_variables():
286 |                         tf.summary.histogram(variable.op.name, variable)
287 | 
288 |                     predictions = result["predictions"]
289 |                     tower_predictions.append(predictions)
290 | 
291 |                     if "loss" in result.keys():
292 |                         label_loss = result["loss"]
293 |                     else:
294 |                         label_loss = label_loss_fn.calculate_loss(predictions, tower_labels[i])
295 | 
296 |                     if "regularization_loss" in result.keys():
297 |                         reg_loss = result["regularization_loss"]
298 |                     else:
299 |                         reg_loss = tf.constant(0.0)
300 | 
301 |                     reg_losses = tf.losses.get_regularization_losses()
302 |                     if reg_losses:
303 |                         reg_loss += tf.add_n(reg_losses)
304 | 
305 |                     tower_reg_losses.append(reg_loss)
306 | 
307 |                     # Adds update_ops (e.g., moving average updates in batch normalization) as
308 |                     # a dependency to the train_op.
309 |                     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
310 |                     if "update_ops" in result.keys():
311 |                         update_ops += result["update_ops"]
312 |                     if update_ops:
313 |                         with tf.control_dependencies(update_ops):
314 |                             barrier = tf.no_op(name="gradient_barrier")
315 |                             with tf.control_dependencies([barrier]):
316 |                                 label_loss = tf.identity(label_loss)
317 | 
318 |                     tower_label_losses.append(label_loss)
319 | 
320 |                     # Incorporate the L2 weight penalties etc.
321 |                     final_loss = regularization_penalty * reg_loss + label_loss
322 |                     gradients = optimizer.compute_gradients(final_loss,
323 |                                                             colocate_gradients_with_ops=False)
324 |                     tower_gradients.append(gradients)
325 |     label_loss = tf.reduce_mean(tf.stack(tower_label_losses))
326 |     tf.summary.scalar("label_loss", label_loss)
327 |     if regularization_penalty != 0:
328 |         reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses))
329 |         tf.summary.scalar("reg_loss", reg_loss)
330 |     merged_gradients = utils.combine_gradients(tower_gradients)
331 | 
332 |     if clip_gradient_norm > 0:
333 |         with tf.name_scope('clip_grads'):
334 |             merged_gradients = utils.clip_gradient_norms(merged_gradients, clip_gradient_norm)
335 | 
336 |     train_op = optimizer.apply_gradients(merged_gradients, global_step=global_step)
337 | 
338 |     tf.add_to_collection("global_step", global_step)
339 |     tf.add_to_collection("loss", label_loss)
340 |     tf.add_to_collection("predictions", tf.concat(tower_predictions, 0))
341 |     tf.add_to_collection("input_batch_raw", model_input_raw)
342 |     tf.add_to_collection("input_batch", model_input)
343 |     tf.add_to_collection("num_frames", num_frames)
344 |     tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
345 |     tf.add_to_collection("train_op", train_op)
346 | 
347 | 
348 | class Trainer(object):
349 |     """A Trainer to train a Tensorflow graph."""
350 | 
351 |     def __init__(self, cluster, task, train_dir, model, reader, model_exporter,
352 |                  log_device_placement=True, max_steps=None,
353 |                  export_model_steps=1000):
354 |         """"Creates a Trainer.
355 |         Args:
356 |           cluster: A tf.train.ClusterSpec if the execution is distributed.
357 |             None otherwise.
358 |           task: A TaskSpec describing the job type and the task index.
359 |         """
360 | 
361 |         self.cluster = cluster
362 |         self.task = task
363 |         self.is_master = (task.type == "master" and task.index == 0)
364 |         self.train_dir = train_dir
365 |         self.config = tf.ConfigProto(
366 |             allow_soft_placement=True, log_device_placement=log_device_placement)
367 |         self.model = model
368 |         self.reader = reader
369 |         self.model_exporter = model_exporter
370 |         self.max_steps = max_steps
371 |         self.max_steps_reached = False
372 |         self.export_model_steps = export_model_steps
373 |         self.last_model_export_step = 0
374 | 
375 |     #     if self.is_master and self.task.index > 0:
376 |     #       raise StandardError("%s: Only one replica of master expected",
377 |     #                           task_as_string(self.task))
378 | 
379 |     def run(self, start_new_model=False):
380 |         """Performs training on the currently defined Tensorflow graph.
381 |         Returns:
382 |           A tuple of the training Hit@1 and the training PERR.
383 |         """
384 |         if self.is_master and start_new_model:
385 |             self.remove_training_directory(self.train_dir)
386 | 
387 |         if not os.path.exists(self.train_dir):
388 |             os.makedirs(self.train_dir)
389 | 
390 |         model_flags_dict = {
391 |             "model": FLAGS.model,
392 |             "feature_sizes": FLAGS.feature_sizes,
393 |             "feature_names": FLAGS.feature_names,
394 |             "frame_features": FLAGS.frame_features,
395 |             "label_loss": FLAGS.label_loss,
396 |         }
397 |         flags_json_path = os.path.join(FLAGS.train_dir, "model_flags.json")
398 |         if os.path.exists(flags_json_path):
399 |             existing_flags = json.load(open(flags_json_path))
400 |             if existing_flags != model_flags_dict:
401 |                 logging.error("Model flags do not match existing file %s. Please "
402 |                               "delete the file, change --train_dir, or pass flag "
403 |                               "--start_new_model",
404 |                               flags_json_path)
405 |                 logging.error("Ran model with flags: %s", str(model_flags_dict))
406 |                 logging.error("Previously ran with flags: %s", str(existing_flags))
407 |                 exit(1)
408 |         else:
409 |             # Write the file.
410 |             with open(flags_json_path, "w") as fout:
411 |                 fout.write(json.dumps(model_flags_dict))
412 | 
413 |         target, device_fn = self.start_server_if_distributed()
414 | 
415 |         meta_filename = self.get_meta_filename(start_new_model, self.train_dir)
416 | 
417 |         with tf.Graph().as_default() as graph:
418 |             if meta_filename:
419 |                 saver = self.recover_model(meta_filename)
420 | 
421 |             with tf.device(device_fn):
422 |                 if not meta_filename:
423 |                     saver = self.build_model(self.model, self.reader)
424 | 
425 |                 global_step = tf.get_collection("global_step")[0]
426 |                 loss = tf.get_collection("loss")[0]
427 |                 predictions = tf.get_collection("predictions")[0]
428 |                 labels = tf.get_collection("labels")[0]
429 |                 train_op = tf.get_collection("train_op")[0]
430 |                 init_op = tf.global_variables_initializer()
431 | 
432 |         sv = tf.train.Supervisor(
433 |             graph,
434 |             logdir=self.train_dir,
435 |             init_op=init_op,
436 |             is_chief=self.is_master,
437 |             global_step=global_step,
438 |             save_model_secs=15 * 60,
439 |             save_summaries_secs=120,
440 |             saver=saver)
441 | 
442 |         logging.info("%s: Starting managed session.", task_as_string(self.task))
443 |         with sv.managed_session(target, config=self.config) as sess:
444 |             try:
445 |                 logging.info("%s: Entering training loop.", task_as_string(self.task))
446 |                 while (not sv.should_stop()) and (not self.max_steps_reached):
447 |                     batch_start_time = time.time()
448 |                     _, global_step_val, loss_val, predictions_val, labels_val = sess.run(
449 |                         [train_op, global_step, loss, predictions, labels])
450 |                     seconds_per_batch = time.time() - batch_start_time
451 |                     examples_per_second = labels_val.shape[0] / seconds_per_batch
452 | 
453 |                     if self.max_steps and self.max_steps <= global_step_val:
454 |                         self.max_steps_reached = True
455 | 
456 |                     if self.is_master and global_step_val % 10 == 0 and self.train_dir:
457 |                         eval_start_time = time.time()
458 |                         hit_at_one = eval_util.calculate_hit_at_one(predictions_val, labels_val)
459 |                         perr = eval_util.calculate_precision_at_equal_recall_rate(predictions_val,
460 |                                                                                   labels_val)
461 |                         gap = eval_util.calculate_gap(predictions_val, labels_val)
462 |                         eval_end_time = time.time()
463 |                         eval_time = eval_end_time - eval_start_time
464 | 
465 |                         logging.info("training step " + str(global_step_val) + " | Loss: " + ("%.2f" % loss_val) +
466 |                                      " Examples/sec: " + ("%.2f" % examples_per_second) + " | Hit@1: " +
467 |                                      ("%.2f" % hit_at_one) + " PERR: " + ("%.2f" % perr) +
468 |                                      " GAP: " + ("%.2f" % gap))
469 | 
470 |                         sv.summary_writer.add_summary(
471 |                             utils.MakeSummary("model/Training_Hit@1", hit_at_one),
472 |                             global_step_val)
473 |                         sv.summary_writer.add_summary(
474 |                             utils.MakeSummary("model/Training_Perr", perr), global_step_val)
475 |                         sv.summary_writer.add_summary(
476 |                             utils.MakeSummary("model/Training_GAP", gap), global_step_val)
477 |                         sv.summary_writer.add_summary(
478 |                             utils.MakeSummary("global_step/Examples/Second",
479 |                                               examples_per_second), global_step_val)
480 |                         sv.summary_writer.flush()
481 | 
482 |                         # Exporting the model every x steps
483 |                         time_to_export = ((self.last_model_export_step == 0) or
484 |                                           (global_step_val - self.last_model_export_step
485 |                                            >= self.export_model_steps))
486 | 
487 |                         if self.is_master and time_to_export:
488 |                             self.export_model(global_step_val, sv.saver, sv.save_path, sess)
489 |                             self.last_model_export_step = global_step_val
490 |                     else:
491 |                         logging.info("training step " + str(global_step_val) + " | Loss: " +
492 |                                      ("%.2f" % loss_val) + " Examples/sec: " + ("%.2f" % examples_per_second))
493 |             except tf.errors.OutOfRangeError:
494 |                 logging.info("%s: Done training -- epoch limit reached.",
495 |                              task_as_string(self.task))
496 | 
497 |         logging.info("%s: Exited training loop.", task_as_string(self.task))
498 |         sv.Stop()
499 | 
500 | 
501 |     def export_model(self, global_step_val, saver, save_path, session):
502 |         # If the model has already been exported at this step, return.
503 |         if global_step_val == self.last_model_export_step:
504 |             return
505 | 
506 |         last_checkpoint = saver.save(session, save_path, global_step_val)
507 | 
508 |         model_dir = "{0}/export/step_{1}".format(self.train_dir, global_step_val)
509 |         logging.info("%s: Exporting the model at step %s to %s.",
510 |                      task_as_string(self.task), global_step_val, model_dir)
511 | 
512 |         self.model_exporter.export_model(
513 |             model_dir=model_dir,
514 |             global_step_val=global_step_val,
515 |             last_checkpoint=last_checkpoint)
516 | 
517 | 
518 |     def start_server_if_distributed(self):
519 |         """Starts a server if the execution is distributed."""
520 | 
521 |         if self.cluster:
522 |             logging.info("%s: Starting trainer within cluster %s.",
523 |                          task_as_string(self.task), self.cluster.as_dict())
524 |             server = start_server(self.cluster, self.task)
525 |             target = server.target
526 |             device_fn = tf.train.replica_device_setter(
527 |                 ps_device="/job:ps",
528 |                 worker_device="/job:%s/task:%d" % (self.task.type, self.task.index),
529 |                 cluster=self.cluster)
530 |         else:
531 |             target = ""
532 |             device_fn = ""
533 |         return (target, device_fn)
534 | 
535 |     def remove_training_directory(self, train_dir):
536 |         """Removes the training directory."""
537 |         try:
538 |             logging.info(
539 |                 "%s: Removing existing train directory.",
540 |                 task_as_string(self.task))
541 |             gfile.DeleteRecursively(train_dir)
542 |         except:
543 |             logging.error(
544 |                 "%s: Failed to delete directory " + train_dir +
545 |                 " when starting a new model. Please delete it manually and" +
546 |                 " try again.", task_as_string(self.task))
547 | 
548 |     def get_meta_filename(self, start_new_model, train_dir):
549 |         if start_new_model:
550 |             logging.info("%s: Flag 'start_new_model' is set. Building a new model.",
551 |                          task_as_string(self.task))
552 |             return None
553 | 
554 |         latest_checkpoint = tf.train.latest_checkpoint(train_dir)
555 |         if not latest_checkpoint:
556 |             logging.info("%s: No checkpoint file found. Building a new model.",
557 |                          task_as_string(self.task))
558 |             return None
559 | 
560 |         meta_filename = latest_checkpoint + ".meta"
561 |         if not gfile.Exists(meta_filename):
562 |             logging.info("%s: No meta graph file found. Building a new model.",
563 |                          task_as_string(self.task))
564 |             return None
565 |         else:
566 |             return meta_filename
567 | 
568 |     def recover_model(self, meta_filename):
569 |         logging.info("%s: Restoring from meta graph file %s",
570 |                      task_as_string(self.task), meta_filename)
571 |         return tf.train.import_meta_graph(meta_filename)
572 | 
573 |     def build_model(self, model, reader):
574 |         """Find the model and build the graph."""
575 | 
576 |         label_loss_fn = find_class_by_name(FLAGS.label_loss, [losses])()
577 |         optimizer_class = find_class_by_name(FLAGS.optimizer, [tf.train])
578 | 
579 |         build_graph(reader=reader,
580 |                     model=model,
581 |                     optimizer_class=optimizer_class,
582 |                     clip_gradient_norm=FLAGS.clip_gradient_norm,
583 |                     train_data_pattern=FLAGS.train_data_pattern,
584 |                     label_loss_fn=label_loss_fn,
585 |                     base_learning_rate=FLAGS.base_learning_rate,
586 |                     learning_rate_decay=FLAGS.learning_rate_decay,
587 |                     learning_rate_decay_examples=FLAGS.learning_rate_decay_examples,
588 |                     regularization_penalty=FLAGS.regularization_penalty,
589 |                     num_readers=FLAGS.num_readers,
590 |                     batch_size=FLAGS.batch_size,
591 |                     num_epochs=FLAGS.num_epochs)
592 | 
593 |         return tf.train.Saver(max_to_keep=0, keep_checkpoint_every_n_hours=1.0)
594 | 
595 | 
596 | def get_reader():
597 |     # Convert feature_names and feature_sizes to lists of values.
598 |     feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
599 |         FLAGS.feature_names, FLAGS.feature_sizes)
600 | 
601 |     if FLAGS.frame_features:
602 |         reader = readers.YT8MFrameFeatureReader(
603 |             feature_names=feature_names, feature_sizes=feature_sizes)
604 |     else:
605 |         reader = readers.YT8MAggregatedFeatureReader(
606 |             feature_names=feature_names, feature_sizes=feature_sizes)
607 | 
608 |     return reader
609 | 
610 | 
611 | class ParameterServer(object):
612 |     """A parameter server to serve variables in a distributed execution."""
613 | 
614 |     def __init__(self, cluster, task):
615 |         """Creates a ParameterServer.
616 |         Args:
617 |           cluster: A tf.train.ClusterSpec if the execution is distributed.
618 |             None otherwise.
619 |           task: A TaskSpec describing the job type and the task index.
620 |         """
621 | 
622 |         self.cluster = cluster
623 |         self.task = task
624 | 
625 |     def run(self):
626 |         """Starts the parameter server."""
627 | 
628 |         logging.info("%s: Starting parameter server within cluster %s.",
629 |                      task_as_string(self.task), self.cluster.as_dict())
630 |         server = start_server(self.cluster, self.task)
631 |         server.join()
632 | 
633 | 
634 | def start_server(cluster, task):
635 |     """Creates a Server.
636 |       Args:
637 |         cluster: A tf.train.ClusterSpec if the execution is distributed.
638 |           None otherwise.
639 |         task: A TaskSpec describing the job type and the task index.
640 |       """
641 | 
642 |     if not task.type:
643 |         raise ValueError("%s: The task type must be specified." %
644 |                          task_as_string(task))
645 |     if task.index is None:
646 |         raise ValueError("%s: The task index must be specified." %
647 |                          task_as_string(task))
648 | 
649 |     # Create and start a server.
650 |     return tf.train.Server(
651 |         tf.train.ClusterSpec(cluster),
652 |         protocol="grpc",
653 |         job_name=task.type,
654 |         task_index=task.index)
655 | 
656 | 
657 | def task_as_string(task):
658 |     return "/job:%s/task:%s" % (task.type, task.index)
659 | 
660 | 
661 | def main(unused_argv):
662 |     # Load the environment.
663 |     env = json.loads(os.environ.get("TF_CONFIG", "{}"))
664 | 
665 |     # Load the cluster data from the environment.
666 |     cluster_data = env.get("cluster", None)
667 |     cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
668 | 
669 |     # Load the task data from the environment.
670 |     task_data = env.get("task", None) or {"type": "master", "index": 0}
671 |     task = type("TaskSpec", (object,), task_data)
672 | 
673 |     # Logging the version.
674 |     logging.set_verbosity(tf.logging.INFO)
675 |     logging.info("%s: Tensorflow version: %s.",
676 |                  task_as_string(task), tf.__version__)
677 | 
678 |     # Dispatch to a master, a worker, or a parameter server.
679 |     if not cluster or task.type == "master" or task.type == "worker":
680 |         model = find_class_by_name(FLAGS.model,
681 |                                    [frame_level_models, video_level_models])()
682 | 
683 |         reader = get_reader()
684 | 
685 |         model_exporter = export_model.ModelExporter(
686 |             frame_features=FLAGS.frame_features,
687 |             model=model,
688 |             reader=reader)
689 | 
690 |         Trainer(cluster, task, FLAGS.train_dir, model, reader, model_exporter,
691 |                 FLAGS.log_device_placement, FLAGS.max_steps,
692 |                 FLAGS.export_model_steps).run(start_new_model=FLAGS.start_new_model)
693 | 
694 |     elif task.type == "ps":
695 |         ParameterServer(cluster, task).run()
696 |     else:
697 |         raise ValueError("%s: Invalid task_type: %s." %
698 |                          (task_as_string(task), task.type))
699 | 
700 | 
701 | if __name__ == "__main__":
702 |     app.run()
703 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Contains a collection of util functions for training and evaluating.
 16 | """
 17 | 
 18 | import numpy
 19 | import tensorflow as tf
 20 | from tensorflow import logging
 21 | 
 22 | try:
 23 |     xrange          # Python 2
 24 | except NameError:
 25 |     xrange = range  # Python 3
 26 | 
 27 | 
 28 | def Dequantize(feat_vector, max_quantized_value=2, min_quantized_value=-2):
 29 |     """Dequantize the feature from the byte format to the float format.
 30 | 
 31 |       Args:
 32 |         feat_vector: the input 1-d vector.
 33 |         max_quantized_value: the maximum of the quantized value.
 34 |         min_quantized_value: the minimum of the quantized value.
 35 | 
 36 |       Returns:
 37 |         A float vector which has the same shape as feat_vector.
 38 |     """
 39 |     assert max_quantized_value > min_quantized_value
 40 |     quantized_range = max_quantized_value - min_quantized_value
 41 |     scalar = quantized_range / 255.0
 42 |     bias = (quantized_range / 512.0) + min_quantized_value
 43 |     return feat_vector * scalar + bias
 44 | 
 45 | 
 46 | def MakeSummary(name, value):
 47 |     """Creates a tf.Summary proto with the given name and value."""
 48 |     summary = tf.Summary()
 49 |     val = summary.value.add()
 50 |     val.tag = str(name)
 51 |     val.simple_value = float(value)
 52 |     return summary
 53 | 
 54 | 
 55 | def AddGlobalStepSummary(summary_writer,
 56 |                          global_step_val,
 57 |                          global_step_info_dict,
 58 |                          summary_scope="Eval"):
 59 |     """Add the global_step summary to the Tensorboard.
 60 | 
 61 |       Args:
 62 |         summary_writer: Tensorflow summary_writer.
 63 |         global_step_val: a int value of the global step.
 64 |         global_step_info_dict: a dictionary of the evaluation metrics calculated for
 65 |           a mini-batch.
 66 |         summary_scope: Train or Eval.
 67 | 
 68 |       Returns:
 69 |         A string of this global_step summary
 70 |     """
 71 |     this_hit_at_one = global_step_info_dict["hit_at_one"]
 72 |     this_perr = global_step_info_dict["perr"]
 73 |     this_loss = global_step_info_dict["loss"]
 74 |     examples_per_second = global_step_info_dict.get("examples_per_second", -1)
 75 | 
 76 |     summary_writer.add_summary(
 77 |         MakeSummary("GlobalStep/" + summary_scope + "_Hit@1", this_hit_at_one),
 78 |         global_step_val)
 79 |     summary_writer.add_summary(
 80 |         MakeSummary("GlobalStep/" + summary_scope + "_Perr", this_perr),
 81 |         global_step_val)
 82 |     summary_writer.add_summary(
 83 |         MakeSummary("GlobalStep/" + summary_scope + "_Loss", this_loss),
 84 |         global_step_val)
 85 | 
 86 |     if examples_per_second != -1:
 87 |         summary_writer.add_summary(
 88 |             MakeSummary("GlobalStep/" + summary_scope + "_Example_Second",
 89 |                         examples_per_second), global_step_val)
 90 | 
 91 |     summary_writer.flush()
 92 |     info = ("global_step {0} | Batch Hit@1: {1:.3f} | Batch PERR: {2:.3f} | Batch Loss: {3:.3f} "
 93 |             "| Examples_per_sec: {4:.3f}").format(
 94 |         global_step_val, this_hit_at_one, this_perr, this_loss,
 95 |         examples_per_second)
 96 |     return info
 97 | 
 98 | 
 99 | def AddEpochSummary(summary_writer,
100 |                     global_step_val,
101 |                     epoch_info_dict,
102 |                     summary_scope="Eval"):
103 |     """Add the epoch summary to the Tensorboard.
104 | 
105 |       Args:
106 |         summary_writer: TensorFlow summary_writer.
107 |         global_step_val: a int value of the global step.
108 |         epoch_info_dict: a dictionary of the evaluation metrics calculated for the
109 |           whole epoch.
110 |         summary_scope: Train or Eval.
111 | 
112 |       Returns:
113 |         A string of this global_step summary
114 |     """
115 |     epoch_id = epoch_info_dict["epoch_id"]
116 |     avg_hit_at_one = epoch_info_dict["avg_hit_at_one"]
117 |     avg_perr = epoch_info_dict["avg_perr"]
118 |     avg_loss = epoch_info_dict["avg_loss"]
119 |     aps = epoch_info_dict["aps"]
120 |     gap = epoch_info_dict["gap"]
121 |     mean_ap = numpy.mean(aps)
122 | 
123 |     summary_writer.add_summary(
124 |         MakeSummary("Epoch/" + summary_scope + "_Avg_Hit@1", avg_hit_at_one),
125 |         global_step_val)
126 |     summary_writer.add_summary(
127 |         MakeSummary("Epoch/" + summary_scope + "_Avg_Perr", avg_perr),
128 |         global_step_val)
129 |     summary_writer.add_summary(
130 |         MakeSummary("Epoch/" + summary_scope + "_Avg_Loss", avg_loss),
131 |         global_step_val)
132 |     summary_writer.add_summary(
133 |         MakeSummary("Epoch/" + summary_scope + "_MAP", mean_ap),
134 |         global_step_val)
135 |     summary_writer.add_summary(
136 |         MakeSummary("Epoch/" + summary_scope + "_GAP", gap),
137 |         global_step_val)
138 |     summary_writer.flush()
139 | 
140 |     info = ("epoch/eval number {0} | Avg_Hit@1: {1:.3f} | Avg_PERR: {2:.3f} "
141 |             "| MAP: {3:.3f} | GAP: {4:.3f} | Avg_Loss: {5:3f}").format(
142 |         epoch_id, avg_hit_at_one, avg_perr, mean_ap, gap, avg_loss)
143 |     return info
144 | 
145 | 
146 | def GetListOfFeatureNamesAndSizes(feature_names, feature_sizes):
147 |     """Extract the list of feature names and the dimensionality of each feature
148 |          from string of comma separated values.
149 | 
150 |       Args:
151 |         feature_names: string containing comma separated list of feature names
152 |         feature_sizes: string containing comma separated list of feature sizes
153 | 
154 |       Returns:
155 |         List of the feature names and list of the dimensionality of each feature.
156 |         Elements in the first/second list are strings/integers.
157 |     """
158 |     list_of_feature_names = [
159 |         feature_names.strip() for feature_names in feature_names.split(',')]
160 |     list_of_feature_sizes = [
161 |         int(feature_sizes) for feature_sizes in feature_sizes.split(',')]
162 |     if len(list_of_feature_names) != len(list_of_feature_sizes):
163 |         logging.error("length of the feature names (=" +
164 |                       str(len(list_of_feature_names)) + ") != length of feature "
165 |                                                         "sizes (=" + str(len(list_of_feature_sizes)) + ")")
166 | 
167 |     return list_of_feature_names, list_of_feature_sizes
168 | 
169 | 
170 | def clip_gradient_norms(gradients_to_variables, max_norm):
171 |     """Clips the gradients by the given value.
172 | 
173 |       Args:
174 |         gradients_to_variables: A list of gradient to variable pairs (tuples).
175 |         max_norm: the maximum norm value.
176 | 
177 |       Returns:
178 |         A list of clipped gradient to variable pairs.
179 |     """
180 |     clipped_grads_and_vars = []
181 |     for grad, var in gradients_to_variables:
182 |         if grad is not None:
183 |             if isinstance(grad, tf.IndexedSlices):
184 |                 tmp = tf.clip_by_norm(grad.values, max_norm)
185 |                 grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
186 |             else:
187 |                 grad = tf.clip_by_norm(grad, max_norm)
188 |         clipped_grads_and_vars.append((grad, var))
189 |     return clipped_grads_and_vars
190 | 
191 | 
192 | def combine_gradients(tower_grads):
193 |     """Calculate the combined gradient for each shared variable across all towers.
194 | 
195 |      Note that this function provides a synchronization point across all towers.
196 | 
197 |      Args:
198 |        tower_grads: List of lists of (gradient, variable) tuples. The outer list
199 |          is over individual gradients. The inner list is over the gradient
200 |          calculation for each tower.
201 |      Returns:
202 |         List of pairs of (gradient, variable) where the gradient has been summed
203 |         across all towers.
204 |     """
205 |     filtered_grads = [[x for x in grad_list if x[0] is not None] for grad_list in tower_grads]
206 |     final_grads = []
207 |     for i in xrange(len(filtered_grads[0])):
208 |         grads = [filtered_grads[t][i] for t in xrange(len(filtered_grads))]
209 |         grad = tf.stack([x[0] for x in grads], 0)
210 |         grad = tf.reduce_sum(grad, 0)
211 |         final_grads.append((grad, filtered_grads[0][i][1],))
212 | 
213 |     return final_grads
214 | 


--------------------------------------------------------------------------------
/video_level_models.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Deep Topology All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS-IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Contains model definitions."""
 16 | # noinspection PyUnresolvedReferences
 17 | import pathmagic
 18 | from tensorflow import flags
 19 | import attention_modules
 20 | import tensorflow as tf
 21 | import tensorflow.contrib.slim as slim
 22 | import models
 23 | import math
 24 | 
 25 | FLAGS = flags.FLAGS
 26 | flags.DEFINE_integer(
 27 |     "moe_num_mixtures", 2,
 28 |     "The number of mixtures (excluding the dummy 'expert') used for MoeModel.")
 29 | 
 30 | 
 31 | ###############################################################################
 32 | # Baseline (Benchmark) models #################################################
 33 | ###############################################################################
 34 | flags.DEFINE_float(
 35 |     "moe_l2", 1e-8,
 36 |     "L2 penalty for MoeModel.")
 37 | flags.DEFINE_integer(
 38 |     "moe_low_rank_gating", -1,
 39 |     "Low rank gating for MoeModel.")
 40 | flags.DEFINE_bool(
 41 |     "moe_prob_gating", False,
 42 |     "Prob gating for MoeModel.")
 43 | flags.DEFINE_string(
 44 |     "moe_prob_gating_input", "prob",
 45 |     "input Prob gating for MoeModel.")
 46 | 
 47 | 
 48 | class MoeModel(models.BaseModel):
 49 |     """A softmax over a mixture of logistic models (with L2 regularization)."""
 50 | 
 51 |     def create_model(self,
 52 |                      model_input,
 53 |                      vocab_size,
 54 |                      is_training,
 55 |                      num_mixtures=None,
 56 |                      l2_penalty=1e-8,
 57 |                      **unused_params):
 58 |         """Creates a Mixture of (Logistic) Experts model.
 59 |          It also includes the possibility of gating the probabilities
 60 |          The model consists of a per-class softmax distribution over a
 61 |          configurable number of logistic classifiers. One of the classifiers in the
 62 |          mixture is not trained, and always predicts 0.
 63 |         Args:
 64 |           model_input: 'batch_size' x 'num_features' matrix of input features.
 65 |           vocab_size: The number of classes in the dataset.
 66 |           is_training: Is this the training phase ?
 67 |           num_mixtures: The number of mixtures (excluding a dummy 'expert' that
 68 |             always predicts the non-existence of an entity).
 69 |           l2_penalty: How much to penalize the squared magnitudes of parameter
 70 |             values.
 71 |         Returns:
 72 |           A dictionary with a tensor containing the probability predictions of the
 73 |           model in the 'predictions' key. The dimensions of the tensor are
 74 |           batch_size x num_classes.
 75 |         """
 76 |         num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
 77 |         low_rank_gating = FLAGS.moe_low_rank_gating
 78 |         l2_penalty = FLAGS.moe_l2
 79 |         gating_probabilities = FLAGS.moe_prob_gating
 80 |         gating_input = FLAGS.moe_prob_gating_input
 81 | 
 82 |         input_size = model_input.get_shape().as_list()[1]
 83 |         remove_diag = FLAGS.gating_remove_diag
 84 | 
 85 |         if low_rank_gating == -1:
 86 |             gate_activations = slim.fully_connected(
 87 |                 model_input,
 88 |                 vocab_size * (num_mixtures + 1),
 89 |                 activation_fn=None,
 90 |                 biases_initializer=None,
 91 |                 weights_regularizer=slim.l2_regularizer(l2_penalty),
 92 |                 scope="gates")
 93 |         else:
 94 |             gate_activations1 = slim.fully_connected(
 95 |                 model_input,
 96 |                 low_rank_gating,
 97 |                 activation_fn=None,
 98 |                 biases_initializer=None,
 99 |                 weights_regularizer=slim.l2_regularizer(l2_penalty),
100 |                 scope="gates1")
101 |             gate_activations = slim.fully_connected(
102 |                 gate_activations1,
103 |                 vocab_size * (num_mixtures + 1),
104 |                 activation_fn=None,
105 |                 biases_initializer=None,
106 |                 weights_regularizer=slim.l2_regularizer(l2_penalty),
107 |                 scope="gates2")
108 | 
109 |         expert_activations = slim.fully_connected(
110 |             model_input,
111 |             vocab_size * num_mixtures,
112 |             activation_fn=None,
113 |             weights_regularizer=slim.l2_regularizer(l2_penalty),
114 |             scope="experts")
115 | 
116 |         gating_distribution = tf.nn.softmax(tf.reshape(
117 |             gate_activations,
118 |             [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
119 |         expert_distribution = tf.nn.sigmoid(tf.reshape(
120 |             expert_activations,
121 |             [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures
122 | 
123 |         probabilities_by_class_and_batch = tf.reduce_sum(
124 |             gating_distribution[:, :num_mixtures] * expert_distribution, 1)
125 |         probabilities = tf.reshape(probabilities_by_class_and_batch,
126 |                                    [-1, vocab_size])
127 | 
128 |         if gating_probabilities:
129 |             if gating_input == 'prob':
130 |                 gating_weights = tf.get_variable("gating_prob_weights",
131 |                                                  [vocab_size, vocab_size],
132 |                                                  initializer=tf.random_normal_initializer(
133 |                                                      stddev=1 / math.sqrt(vocab_size)))
134 |                 gates = tf.matmul(probabilities, gating_weights)
135 |             else:
136 |                 gating_weights = tf.get_variable("gating_prob_weights",
137 |                                                  [input_size, vocab_size],
138 |                                                  initializer=tf.random_normal_initializer(
139 |                                                      stddev=1 / math.sqrt(vocab_size)))
140 | 
141 |                 gates = tf.matmul(model_input, gating_weights)
142 | 
143 |             if remove_diag:
144 |                 # removes diagonals coefficients
145 |                 diagonals = tf.matrix_diag_part(gating_weights)
146 |                 gates = gates - tf.multiply(diagonals, probabilities)
147 | 
148 |             gates = slim.batch_norm(
149 |                 gates,
150 |                 center=True,
151 |                 scale=True,
152 |                 is_training=is_training,
153 |                 scope="gating_prob_bn")
154 | 
155 |             gates = tf.sigmoid(gates)
156 | 
157 |             probabilities = tf.multiply(probabilities, gates)
158 | 
159 |         return {"predictions": probabilities}
160 | 
161 | 
162 | class FishMoeModel(models.BaseModel):
163 |     """A softmax over a mixture of logistic models (with L2 regularization)."""
164 | 
165 |     def create_model(self,
166 |                      model_input,
167 |                      vocab_size,
168 |                      is_training,
169 |                      num_mixtures=None,
170 |                      l2_penalty=1e-8,
171 |                      filter_size=2,
172 |                      **unused_params):
173 |         """Creates a Mixture of (Logistic) Experts model.
174 |          It also includes the possibility of gating the probabilities
175 |          The model consists of a per-class softmax distribution over a
176 |          configurable number of logistic classifiers. One of the classifiers in the
177 |          mixture is not trained, and always predicts 0.
178 |         Args:
179 |           model_input: 'batch_size' x 'num_features' matrix of input features.
180 |           vocab_size: The number of classes in the dataset.
181 |           is_training: Is this the training phase ?
182 |           num_mixtures: The number of mixtures (excluding a dummy 'expert' that
183 |             always predicts the non-existence of an entity).
184 |           l2_penalty: How much to penalize the squared magnitudes of parameter
185 |             values.
186 |         Returns:
187 |           A dictionary with a tensor containing the probability predictions of the
188 |           model in the 'predictions' key. The dimensions of the tensor are
189 |           batch_size x num_classes.
190 |         """
191 |         num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
192 |         l2_penalty = FLAGS.moe_l2
193 | 
194 |         gate_activations = slim.fully_connected(
195 |             model_input,
196 |             vocab_size * (num_mixtures + 1),
197 |             activation_fn=None,
198 |             biases_initializer=None,
199 |             weights_regularizer=slim.l2_regularizer(l2_penalty),
200 |             scope="gates")
201 | 
202 |         expert_activations = slim.fully_connected(
203 |             model_input,
204 |             vocab_size * num_mixtures,
205 |             activation_fn=None,
206 |             weights_regularizer=slim.l2_regularizer(l2_penalty),
207 |             scope="experts")
208 | 
209 |         gating_distribution = tf.nn.softmax(tf.reshape(
210 |             gate_activations,
211 |             [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
212 |         expert_distribution = tf.nn.sigmoid(tf.reshape(
213 |             expert_activations,
214 |             [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures
215 | 
216 |         probabilities_by_class_and_batch = tf.reduce_sum(
217 |             gating_distribution[:, :num_mixtures] * expert_distribution, 1)
218 |         probabilities = tf.reshape(probabilities_by_class_and_batch,
219 |                                    [-1, vocab_size])
220 |         probabilities = tf.layers.batch_normalization(probabilities, training=is_training)
221 | 
222 |         fish_gate = fish_modules.FishGate(hidden_size=vocab_size,
223 |                                           k=2,
224 |                                           dropout_rate=0.9,
225 |                                           is_training=is_training)
226 | 
227 |         probabilities = fish_gate.forward(probabilities)
228 |         probabilities = tf.contrib.layers.layer_norm(probabilities)
229 | 
230 |         probabilities = tf.layers.dense(probabilities, vocab_size, use_bias=True, activation=tf.nn.softmax)
231 | 
232 |         return {"predictions": probabilities}
233 | 
234 | 
235 | class FishMoeModel2(models.BaseModel):
236 |     """A softmax over a mixture of logistic models (with L2 regularization)."""
237 | 
238 |     def create_model(self,
239 |                      model_input,
240 |                      vocab_size,
241 |                      is_training,
242 |                      num_mixtures=None,
243 |                      l2_penalty=1e-8,
244 |                      filter_size=2,
245 |                      **unused_params):
246 |         """Creates a Mixture of (Logistic) Experts model.
247 |          It also includes the possibility of gating the probabilities
248 |          The model consists of a per-class softmax distribution over a
249 |          configurable number of logistic classifiers. One of the classifiers in the
250 |          mixture is not trained, and always predicts 0.
251 |         Args:
252 |           model_input: 'batch_size' x 'num_features' matrix of input features.
253 |           vocab_size: The number of classes in the dataset.
254 |           is_training: Is this the training phase ?
255 |           num_mixtures: The number of mixtures (excluding a dummy 'expert' that
256 |             always predicts the non-existence of an entity).
257 |           l2_penalty: How much to penalize the squared magnitudes of parameter
258 |             values.
259 |         Returns:
260 |           A dictionary with a tensor containing the probability predictions of the
261 |           model in the 'predictions' key. The dimensions of the tensor are
262 |           batch_size x num_classes.
263 |         """
264 |         num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
265 |         l2_penalty = FLAGS.moe_l2
266 | 
267 |         gate_activations = slim.fully_connected(
268 |             model_input,
269 |             vocab_size * (num_mixtures + 1),
270 |             activation_fn=None,
271 |             biases_initializer=None,
272 |             weights_regularizer=slim.l2_regularizer(l2_penalty),
273 |             scope="gates")
274 | 
275 |         expert_activations = slim.fully_connected(
276 |             model_input,
277 |             vocab_size * num_mixtures,
278 |             activation_fn=None,
279 |             weights_regularizer=slim.l2_regularizer(l2_penalty),
280 |             scope="experts")
281 | 
282 |         gating_distribution = tf.nn.softmax(tf.reshape(
283 |             gate_activations,
284 |             [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
285 |         expert_distribution = tf.nn.sigmoid(tf.reshape(
286 |             expert_activations,
287 |             [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures
288 | 
289 |         probabilities_by_class_and_batch = tf.reduce_sum(
290 |             gating_distribution[:, :num_mixtures] * expert_distribution, 1)
291 |         probabilities = tf.reshape(probabilities_by_class_and_batch,
292 |                                    [-1, vocab_size])
293 | 
294 |         fish_gate = fish_modules.FishGate(hidden_size=vocab_size,
295 |                                           k=filter_size,
296 |                                           dropout_rate=0.8,
297 |                                           is_training=is_training)
298 | 
299 |         probabilities = fish_gate.forward(probabilities)
300 | 
301 |         # probabilities = tf.layers.dense(probabilities, vocab_size, use_bias=True, activation=tf.nn.softmax)
302 | 
303 |         return {"predictions": probabilities}
304 | 
305 | 
306 | class FishMoeModel4(models.BaseModel):
307 |     """A softmax over a mixture of logistic models (with L2 regularization)."""
308 | 
309 |     def create_model(self,
310 |                      model_input,
311 |                      vocab_size,
312 |                      is_training,
313 |                      num_mixtures=None,
314 |                      l2_penalty=1e-8,
315 |                      filter_size=2,
316 |                      **unused_params):
317 |         """Creates a Mixture of (Logistic) Experts model.
318 |          It also includes the possibility of gating the probabilities
319 |          The model consists of a per-class softmax distribution over a
320 |          configurable number of logistic classifiers. One of the classifiers in the
321 |          mixture is not trained, and always predicts 0.
322 |         Args:
323 |           model_input: 'batch_size' x 'num_features' matrix of input features.
324 |           vocab_size: The number of classes in the dataset.
325 |           is_training: Is this the training phase ?
326 |           num_mixtures: The number of mixtures (excluding a dummy 'expert' that
327 |             always predicts the non-existence of an entity).
328 |           l2_penalty: How much to penalize the squared magnitudes of parameter
329 |             values.
330 |         Returns:
331 |           A dictionary with a tensor containing the probability predictions of the
332 |           model in the 'predictions' key. The dimensions of the tensor are
333 |           batch_size x num_classes.
334 |         """
335 |         num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
336 |         l2_penalty = FLAGS.moe_l2
337 | 
338 |         fc1 = tf.layers.dense(model_input, vocab_size, activation=tf.nn.relu,
339 |                               kernel_regularizer=slim.l2_regularizer(l2_penalty))
340 |         fc1 = tf.layers.batch_normalization(fc1, training=is_training)
341 |         if is_training:
342 |             fc1 = tf.nn.dropout(fc1, keep_prob=0.9)
343 | 
344 |         fc2 = tf.layers.dense(fc1, vocab_size, activation=tf.nn.relu,
345 |                               kernel_regularizer=slim.l2_regularizer(l2_penalty))
346 |         fc2 = tf.layers.batch_normalization(fc2, training=is_training)
347 |         if is_training:
348 |             fc2 = tf.nn.dropout(fc2, keep_prob=0.9)
349 | 
350 |         fc3 = tf.layers.dense(fc2, vocab_size, activation=tf.nn.sigmoid,
351 |                               kernel_regularizer=slim.l2_regularizer(l2_penalty))
352 |         fc3 = tf.layers.batch_normalization(fc3, training=is_training)
353 |         if is_training:
354 |             fc3 = tf.nn.dropout(fc3, keep_prob=0.9)
355 | 
356 |         fish_gate = fish_modules.FishGate(hidden_size=vocab_size,
357 |                                           k=filter_size,
358 |                                           dropout_rate=0.9,
359 |                                           is_training=is_training)
360 |         probabilities = fish_gate.forward(fc3)
361 | 
362 |         # probabilities = tf.layers.dense(probabilities, vocab_size, use_bias=True, activation=tf.nn.softmax)
363 | 
364 |         return {"predictions": probabilities}
365 | 
366 | 
367 | class FishMoeModel3(models.BaseModel):
368 |     """A softmax over a mixture of logistic models (with L2 regularization)."""
369 | 
370 |     def create_model(self,
371 |                      model_input,
372 |                      vocab_size,
373 |                      is_training,
374 |                      num_mixtures=None,
375 |                      l2_penalty=1e-6,
376 |                      filter_size=2,
377 |                      **unused_params):
378 |         """Creates a Mixture of (Logistic) Experts model.
379 |          It also includes the possibility of gating the probabilities
380 |          The model consists of a per-class softmax distribution over a
381 |          configurable number of logistic classifiers. One of the classifiers in the
382 |          mixture is not trained, and always predicts 0.
383 |         Args:
384 |           model_input: 'batch_size' x 'num_features' matrix of input features.
385 |           vocab_size: The number of classes in the dataset.
386 |           is_training: Is this the training phase ?
387 |           num_mixtures: The number of mixtures (excluding a dummy 'expert' that
388 |             always predicts the non-existence of an entity).
389 |           l2_penalty: How much to penalize the squared magnitudes of parameter
390 |             values.
391 |         Returns:
392 |           A dictionary with a tensor containing the probability predictions of the
393 |           model in the 'predictions' key. The dimensions of the tensor are
394 |           batch_size x num_classes.
395 |         """
396 |         num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
397 |         l2_penalty = FLAGS.moe_l2
398 | 
399 |         gate_activations = slim.fully_connected(
400 |             model_input,
401 |             vocab_size * (num_mixtures + 1),
402 |             activation_fn=None,
403 |             biases_initializer=None,
404 |             weights_regularizer=slim.l2_regularizer(l2_penalty),
405 |             scope="gates")
406 | 
407 |         expert_activations = slim.fully_connected(
408 |             model_input,
409 |             vocab_size * num_mixtures,
410 |             activation_fn=None,
411 |             weights_regularizer=slim.l2_regularizer(l2_penalty),
412 |             scope="experts")
413 | 
414 |         gating_distribution = tf.nn.softmax(tf.reshape(
415 |             gate_activations,
416 |             [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
417 |         expert_distribution = tf.nn.sigmoid(tf.reshape(
418 |             expert_activations,
419 |             [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures
420 | 
421 |         probabilities_by_class_and_batch = tf.reduce_sum(
422 |             gating_distribution[:, :num_mixtures] * expert_distribution, 1)
423 |         probabilities0 = tf.reshape(probabilities_by_class_and_batch,
424 |                                     [-1, vocab_size])
425 |         probabilities0 = tf.layers.batch_normalization(probabilities0, training=is_training)
426 | 
427 |         r_activation0 = tf.layers.dense(probabilities0, vocab_size * filter_size, use_bias=True, activation=tf.nn.relu)
428 |         r_activation0 = tf.layers.batch_normalization(r_activation0, training=is_training)
429 |         if is_training:
430 |             r_activation0 = tf.layers.dropout(r_activation0, 0.9)
431 |         r_activation1 = tf.layers.dense(r_activation0, vocab_size, use_bias=True, activation=None)
432 | 
433 |         probabilities1 = probabilities0 + r_activation1
434 |         probabilities1 = tf.contrib.layers.layer_norm(probabilities1)
435 |         probabilities1 = tf.layers.batch_normalization(probabilities1, training=is_training)
436 |         probabilities2 = tf.layers.dense(probabilities1, vocab_size, use_bias=True, activation=tf.nn.softmax)
437 | 
438 |         return {"predictions": probabilities2}
439 | 
440 | 
441 | class MoeModel2(models.BaseModel):
442 |     """A softmax over a mixture of logistic models (with L2 regularization)."""
443 | 
444 |     def create_model(self,
445 |                      model_input,
446 |                      vocab_size,
447 |                      is_training,
448 |                      num_mixtures=None,
449 |                      l2_penalty=1e-8,
450 |                      **unused_params):
451 |         """Creates a Mixture of (Logistic) Experts model.
452 |          It also includes the possibility of gating the probabilities
453 |          The model consists of a per-class softmax distribution over a
454 |          configurable number of logistic classifiers. One of the classifiers in the
455 |          mixture is not trained, and always predicts 0.
456 |         Args:
457 |           model_input: 'batch_size' x 'num_features' matrix of input features.
458 |           vocab_size: The number of classes in the dataset.
459 |           is_training: Is this the training phase ?
460 |           num_mixtures: The number of mixtures (excluding a dummy 'expert' that
461 |             always predicts the non-existence of an entity).
462 |           l2_penalty: How much to penalize the squared magnitudes of parameter
463 |             values.
464 |         Returns:
465 |           A dictionary with a tensor containing the probability predictions of the
466 |           model in the 'predictions' key. The dimensions of the tensor are
467 |           batch_size x num_classes.
468 |         """
469 |         num_mixtures = 3
470 |         low_rank_gating = FLAGS.moe_low_rank_gating
471 |         l2_penalty = FLAGS.moe_l2
472 |         gating_probabilities = FLAGS.moe_prob_gating
473 |         gating_input = FLAGS.moe_prob_gating_input
474 | 
475 |         if low_rank_gating == -1:
476 |             gate_activations = slim.fully_connected(
477 |                 model_input,
478 |                 vocab_size * (num_mixtures + 1),
479 |                 activation_fn=None,
480 |                 biases_initializer=None,
481 |                 weights_regularizer=slim.l2_regularizer(l2_penalty),
482 |                 scope="gates")
483 |         else:
484 |             gate_activations1 = slim.fully_connected(
485 |                 model_input,
486 |                 low_rank_gating,
487 |                 activation_fn=None,
488 |                 biases_initializer=None,
489 |                 weights_regularizer=slim.l2_regularizer(l2_penalty),
490 |                 scope="gates1")
491 |             gate_activations = slim.fully_connected(
492 |                 gate_activations1,
493 |                 vocab_size * (num_mixtures + 1),
494 |                 activation_fn=None,
495 |                 biases_initializer=None,
496 |                 weights_regularizer=slim.l2_regularizer(l2_penalty),
497 |                 scope="gates2")
498 | 
499 |         expert_activations = slim.fully_connected(
500 |             model_input,
501 |             vocab_size * num_mixtures,
502 |             activation_fn=None,
503 |             weights_regularizer=slim.l2_regularizer(l2_penalty),
504 |             scope="experts")
505 | 
506 |         gating_distribution = tf.nn.softmax(tf.reshape(
507 |             gate_activations,
508 |             [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
509 |         expert_distribution = tf.nn.sigmoid(tf.reshape(
510 |             expert_activations,
511 |             [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures
512 | 
513 |         probabilities_by_class_and_batch = tf.reduce_sum(
514 |             gating_distribution[:, :num_mixtures] * expert_distribution, 1)
515 |         probabilities = tf.reshape(probabilities_by_class_and_batch,
516 |                                    [-1, vocab_size])
517 | 
518 |         filter1 = tf.layers.dense(probabilities,
519 |                                   vocab_size * 2,
520 |                                   use_bias=True,
521 |                                   activation=tf.nn.relu,
522 |                                   name="v-filter1")
523 |         filter1 = tf.layers.batch_normalization(filter1, training=is_training)
524 | 
525 |         if is_training:
526 |             filter1 = tf.nn.dropout(filter1, 0.8)
527 | 
528 |         filter2 = tf.layers.dense(filter1,
529 |                                   vocab_size,
530 |                                   use_bias=False,
531 |                                   activation=None,
532 |                                   name="v-filter2")
533 | 
534 |         probabilities = probabilities + filter2
535 |         probabilities = tf.nn.relu(probabilities)
536 |         probabilities = tf.layers.batch_normalization(probabilities, training=is_training)
537 | 
538 |         probabilities = tf.layers.dense(probabilities, vocab_size, use_bias=True,
539 |                                         activation=tf.nn.sigmoid, name="v-final_output")
540 | 
541 |         return {"predictions": probabilities}
542 | 
543 | 
544 | class JuhanMoeModel(models.BaseModel):
545 |     """A softmax over a mixture of logistic models (with L2 regularization)."""
546 | 
547 |     def create_model(self,
548 |                      model_input,
549 |                      vocab_size,
550 |                      is_training,
551 |                      num_mixtures=None,
552 |                      l2_penalty=1e-8,
553 |                      **unused_params):
554 |         """Creates a Mixture of (Logistic) Experts model.
555 |          The model consists of a per-class softmax distribution over a
556 |          configurable number of logistic classifiers. One of the classifiers in the
557 |          mixture is not trained, and always predicts 0.
558 |         Args:
559 |           model_input: 'batch_size' x 'num_features' matrix of input features.
560 |           vocab_size: The number of classes in the dataset.
561 |           num_mixtures: The number of mixtures (excluding a dummy 'expert' that
562 |             always predicts the non-existence of an entity).
563 |           l2_penalty: How much to penalize the squared magnitudes of parameter
564 |             values.
565 |         Returns:
566 |           A dictionary with a tensor containing the probability predictions of the
567 |           model in the 'predictions' key. The dimensions of the tensor are
568 |           batch_size x num_classes.
569 |         """
570 |         num_mixtures = 3
571 | 
572 |         gate_activations = slim.fully_connected(
573 |             model_input,
574 |             vocab_size * (num_mixtures + 1),
575 |             activation_fn=None,
576 |             biases_initializer=None,
577 |             weights_regularizer=slim.l2_regularizer(l2_penalty),
578 |             scope="gates")
579 |         expert_activations = slim.fully_connected(
580 |             model_input,
581 |             vocab_size * num_mixtures,
582 |             activation_fn=None,
583 |             weights_regularizer=slim.l2_regularizer(l2_penalty),
584 |             scope="experts")
585 | 
586 |         gating_distribution = tf.nn.softmax(tf.reshape(
587 |             gate_activations,
588 |             [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
589 |         expert_distribution = tf.nn.sigmoid(tf.reshape(
590 |             expert_activations,
591 |             [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures
592 | 
593 |         final_probabilities_by_class_and_batch = tf.reduce_sum(
594 |             gating_distribution[:, :num_mixtures] * expert_distribution, 1)
595 |         probabilities = tf.reshape(final_probabilities_by_class_and_batch,
596 |                                    [-1, vocab_size])
597 |         if is_training:
598 |             probabilities = tf.nn.dropout(probabilities, 0.8)
599 | 
600 |         filter1 = tf.layers.dense(probabilities,
601 |                                   vocab_size * 2,
602 |                                   use_bias=True,
603 |                                   activation=tf.nn.leaky_relu,
604 |                                   name="v-filter1")
605 |         filter1 = tf.layers.batch_normalization(filter1, training=is_training)
606 |         if is_training:
607 |             filter1 = tf.nn.dropout(filter1, 0.8)
608 | 
609 |         filter2 = tf.layers.dense(filter1,
610 |                                   vocab_size,
611 |                                   use_bias=False,
612 |                                   activation=None,
613 |                                   name="v-filter2")
614 | 
615 |         probabilities = probabilities + filter2
616 |         probabilities = tf.nn.leaky_relu(probabilities)
617 |         probabilities = tf.layers.batch_normalization(probabilities, training=is_training)
618 | 
619 |         probabilities = tf.layers.dense(probabilities, vocab_size, use_bias=True,
620 |                                         activation=tf.nn.sigmoid, name="v-final_output")
621 | 
622 |         return {"predictions": probabilities}
623 | 
624 | 
625 | class FourLayerBatchNeuralModel(models.BaseModel):
626 |     def create_model(self,
627 |                      model_input,
628 |                      vocab_size,
629 |                      is_training,
630 |                      l2_penalty=1e-7,
631 |                      **unused_params):
632 |         model_input_dim = model_input.get_shape().as_list()[1]
633 |         fc1_weights = tf.get_variable("fc1_weights",
634 |                                       [model_input_dim, vocab_size],
635 |                                       initializer=tf.contrib.layers.xavier_initializer())
636 |         tf.summary.histogram("fc1_weights", fc1_weights)
637 |         fc1_activation = tf.matmul(model_input, fc1_weights)
638 |         fc1_activation = tf.nn.relu(fc1_activation)
639 |         fc1_activation = slim.batch_norm(
640 |             fc1_activation,
641 |             center=True,
642 |             scale=True,
643 |             is_training=is_training,
644 |             scope="fc1_activation_bn")
645 | 
646 |         fc2_weights = tf.get_variable("fc2_weights",
647 |                                       [vocab_size, vocab_size],
648 |                                       initializer=tf.contrib.layers.xavier_initializer())
649 |         tf.summary.histogram("fc2_weights", fc2_weights)
650 |         fc2_activation = tf.matmul(fc1_activation, fc2_weights)
651 |         fc2_activation = tf.nn.relu(fc2_activation)
652 |         fc2_activation = slim.batch_norm(
653 |             fc2_activation,
654 |             center=True,
655 |             scale=True,
656 |             is_training=is_training,
657 |             scope="fc2_activation_bn")
658 | 
659 |         fc3_weights = tf.get_variable("fc3_weights",
660 |                                       [vocab_size, vocab_size],
661 |                                       initializer=tf.contrib.layers.xavier_initializer())
662 |         tf.summary.histogram("fc3_weights", fc3_weights)
663 |         fc3_activation = tf.matmul(fc2_activation, fc3_weights)
664 |         fc3_activation = tf.nn.relu(fc3_activation)
665 |         fc3_activation = slim.batch_norm(
666 |             fc3_activation,
667 |             center=True,
668 |             scale=True,
669 |             is_training=is_training,
670 |             scope="fc3_activation_bn")
671 | 
672 |         fc4_weights = tf.get_variable("fc4_weights",
673 |                                       [vocab_size, vocab_size],
674 |                                       initializer=tf.contrib.layers.xavier_initializer())
675 |         fc4_activation = tf.matmul(fc3_activation, fc4_weights)
676 |         cluster_biases = tf.get_variable("fc4_bias",
677 |                                          [vocab_size],
678 |                                          initializer=tf.constant_initializer(0.01))
679 |         tf.summary.histogram("fc4_bias", cluster_biases)
680 |         fc4_activation += cluster_biases
681 | 
682 |         fc4_activation = tf.sigmoid(fc4_activation)
683 | 
684 |         return {"predictions": fc4_activation}
685 | 
686 | 
687 | class ClassLearningThreeNnModel(models.BaseModel):
688 |     def create_model(self,
689 |                      model_input,
690 |                      vocab_size,
691 |                      is_training,
692 |                      l2_penalty=1e-8,
693 |                      ortho_reg=0,
694 |                      **unused_params):
695 |         fc1 = slim.fully_connected(
696 |             model_input, vocab_size, activation_fn=None, biases_initializer=None,
697 |             weights_regularizer=slim.l2_regularizer(l2_penalty))
698 |         fc1 = tf.contrib.layers.layer_norm(inputs=fc1, center=True, scale=True, activation_fn=tf.nn.leaky_relu)
699 |         if is_training:
700 |             fc1 = tf.nn.dropout(fc1, keep_prob=0.5)
701 | 
702 |         fc2 = slim.fully_connected(
703 |             fc1, vocab_size, activation_fn=None, biases_initializer=None,
704 |             weights_regularizer=slim.l2_regularizer(l2_penalty))
705 |         fc2 = tf.contrib.layers.layer_norm(inputs=fc2, center=True, scale=True, activation_fn=tf.nn.leaky_relu)
706 |         if is_training:
707 |             fc2 = tf.nn.dropout(fc2, keep_prob=0.5)
708 | 
709 |         fc3 = slim.fully_connected(
710 |             fc2, vocab_size, activation_fn=tf.nn.sigmoid, biases_initializer=tf.constant_initializer(0.1),
711 |             weights_regularizer=slim.l2_regularizer(l2_penalty))
712 | 
713 |         return {"predictions": fc3,
714 |                 "regularization_loss": ortho_reg}
715 | 
716 | 
717 | class ClassLearningFourNnModel(models.BaseModel):
718 |     def create_model(self,
719 |                      model_input,
720 |                      vocab_size,
721 |                      is_training,
722 |                      l2_penalty=1e-8,
723 |                      ortho_reg=0,
724 |                      **unused_params):
725 |         fc1 = slim.fully_connected(
726 |             model_input, vocab_size, activation_fn=None, biases_initializer=None,
727 |             weights_regularizer=slim.l2_regularizer(l2_penalty))
728 |         fc1 = tf.contrib.layers.layer_norm(inputs=fc1, center=True, scale=True, activation_fn=tf.nn.leaky_relu)
729 |         # if is_training:
730 |         #     fc1 = tf.nn.dropout(fc1, keep_prob=0.5)
731 | 
732 |         fc2 = slim.fully_connected(
733 |             fc1, vocab_size, activation_fn=None, biases_initializer=None,
734 |             weights_regularizer=slim.l2_regularizer(l2_penalty))
735 |         fc2 = tf.contrib.layers.layer_norm(inputs=fc2, center=True, scale=True, activation_fn=tf.nn.leaky_relu)
736 |         # if is_training:
737 |         #     fc2 = tf.nn.dropout(fc2, keep_prob=0.5)
738 | 
739 |         fc3 = slim.fully_connected(
740 |             fc2, vocab_size, activation_fn=None, biases_initializer=None,
741 |             weights_regularizer=slim.l2_regularizer(l2_penalty))
742 |         fc3 = tf.contrib.layers.layer_norm(inputs=fc3, center=True, scale=True, activation_fn=tf.nn.leaky_relu)
743 | 
744 |         fc4 = slim.fully_connected(
745 |             fc3, vocab_size, activation_fn=tf.nn.sigmoid, biases_initializer=tf.constant_initializer(0.1),
746 |             weights_regularizer=slim.l2_regularizer(l2_penalty))
747 | 
748 |         return {"predictions": fc4,
749 |                 "regularization_loss": ortho_reg}


--------------------------------------------------------------------------------