├── .dockerignore
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── labeler.yml
    └── workflows
    │   ├── ci.yml
    │   └── label.yml
├── .gitignore
├── LICENSE
├── README.md
├── REFERENCES.md
├── backend.Dockerfile
├── dataset
    ├── test_videodatainfo_2017.json
    ├── videodatainfo_2017.json
    └── videos
    │   └── README.md
├── dataset_cache
    └── README.md
├── docker-compose.yml
├── docker_build.sh
├── docker_hub.sh
├── docker_test.sh
├── early_tests.sh
├── environment.yml
├── f5c22f7_images
    ├── 10802.gif
    ├── 12501.gif
    ├── 12589.gif
    ├── 12683.gif
    ├── 12727.gif
    ├── 12901.gif
    ├── 12908.gif
    ├── 12937.gif
    ├── 12939.gif
    ├── 12966.gif
    ├── 12968.gif
    └── 12994.gif
├── frontend.Dockerfile
├── images
    ├── attention.png
    ├── model_audio.png
    ├── model_video.png
    ├── model_word.png
    └── sentence_model.png
├── keep_alive.sh
├── models
    └── README.md
├── requirements.txt
├── src
    ├── __init__.py
    ├── backend
    │   ├── CombinedResults
    │   │   ├── calculate_score_results.sh
    │   │   ├── calculate_total_score_json.py
    │   │   ├── find_total_sentences_unique.py
    │   │   └── summary.sh
    │   ├── __init__.py
    │   ├── data.py
    │   ├── framework.py
    │   ├── model.py
    │   ├── parser.py
    │   ├── plotepochlog.py
    │   ├── pred.sh
    │   ├── preprocess.py
    │   ├── run.sh
    │   ├── test_on_trained.sh
    │   ├── train.py
    │   ├── utils.py
    │   ├── videohandler.py
    │   ├── vocab.py
    │   └── vpreprocess.py
    ├── common
    │   ├── __init__.py
    │   ├── config.py
    │   ├── logger.py
    │   ├── rpc.py
    │   └── status.py
    ├── config.json
    ├── config_docker.json
    ├── frontend
    │   ├── __init__.py
    │   ├── app.py
    │   └── templates
    │   │   ├── get_ids.html
    │   │   ├── index.html
    │   │   ├── layout.html
    │   │   ├── play.html
    │   │   ├── predict.html
    │   │   └── publicindex.html
    ├── run_tests.sh
    └── tests
    │   ├── __init__.py
    │   ├── data
    │       ├── __init__.py
    │       ├── fetcher.py
    │       ├── test_fetcher.py
    │       └── videos
    │       │   ├── .content
    │       │   ├── 12727.mp4
    │       │   └── 12968.mp4
    │   └── env
    │       ├── __init__.py
    │       ├── test_config.py
    │       └── test_video_parsing.py
├── tests
    └── e2e
    │   ├── __init__.py
    │   └── test_external.py
└── uploads
    └── touched


/.dockerignore:
--------------------------------------------------------------------------------
1 | .git


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Sample video**
24 | Provide a sample video (if applicable).
25 | 
26 | **Screenshots**
27 | If applicable, add screenshots to help explain your problem.
28 | 
29 | **Desktop (please complete the following information):**
30 |  - OS: [e.g. iOS]
31 |  - Docker Version
32 |  - Docker Compose Version
33 |  - Browser [e.g. chrome, safari]
34 |  - Version [e.g. 22]
35 | 
36 | **Additional context**
37 | Add any other context about the problem here.
38 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/labeler.yml:
--------------------------------------------------------------------------------
 1 | env:
 2 |   - Dockerfile
 3 |   - environment.yml
 4 |   - requirements.txt
 5 | 
 6 | config:
 7 |   - src/config.json
 8 |   - src/config_docker.json
 9 | 
10 | test:
11 |   - tests/*
12 |   - tests/**/*
13 |   - src/tests/*
14 |   - src/tests/**/*
15 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - VideoCaption
 7 |     - actions
 8 |     - speedo
 9 |  
10 |   pull_request:
11 |     branches:
12 |     - VideoCaption 
13 |   
14 |   schedule:
15 |   - cron: "11 7 1 * *"
16 | 
17 |   workflow_dispatch:
18 | 
19 | jobs:
20 |   build:
21 |     runs-on: ubuntu-latest
22 |     strategy:
23 |       matrix:
24 |         python-version: [2.7]
25 | 
26 |     steps:
27 |       - uses: actions/checkout@v2
28 | 
29 |       - name: Docker Pull
30 |         run: |
31 |           bash docker_hub.sh pull
32 |            
33 |       - name: Install dependencies
34 |         run: |
35 |           pip install -r requirements.txt
36 |           bash early_tests.sh
37 |           sudo apt-get install firefox
38 |           wget 'https://github.com/mozilla/geckodriver/releases/download/v0.30.0/geckodriver-v0.30.0-linux64.tar.gz'
39 |           tar -xvzf geckodriver-v0.30.0-linux64.tar.gz
40 |           sudo mv geckodriver /usr/local/bin
41 | 
42 |       - name: Build
43 |         run: |
44 |           bash docker_build.sh
45 |           
46 |       - name: Test
47 |         run: |
48 |           bash docker_test.sh
49 | 
50 |       # Push image to docker on 'push' event only.
51 |       - name: Deploy to Docker Hub
52 |         if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }}
53 |         env:
54 |           DOCKER_USERNAME: "scopeinfinity"
55 |           DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
56 |         run: |
57 |           { echo "$DOCKER_TOKEN" | docker login -u "$DOCKER_USERNAME" --password-stdin ; } && bash docker_hub.sh push
58 | 


--------------------------------------------------------------------------------
/.github/workflows/label.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will triage pull requests and apply a label based on the
 2 | # paths that are modified in the pull request.
 3 | #
 4 | # To use this workflow, you will need to set up a .github/labeler.yml
 5 | # file with configuration.  For more information, see:
 6 | # https://github.com/actions/labeler/blob/master/README.md
 7 | 
 8 | name: Labeler
 9 | on: [pull_request]
10 | 
11 | jobs:
12 |   label:
13 | 
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |     - uses: actions/labeler@v2
18 |       with:
19 |         repo-token: "${{ secrets.GITHUB_TOKEN }}"
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Other Git Projects
  2 | FFmpeg/
  3 | 
  4 | # Learning Session Files
  5 | *_state.txt
  6 | *_model.dat
  7 | *_logs.txt
  8 | *_model.dat_*_loss_*
  9 | *_logs_epoch_.txt
 10 | *_logs_.txt
 11 | model_*.dat*
 12 | state_*.txt
 13 | 
 14 | # Local Files to ignore
 15 | display
 16 | *.mp4
 17 | *.mp4_
 18 | 
 19 | # Backup Files
 20 | *~
 21 | *#
 22 | .#*
 23 | 
 24 | # Files taken from other source
 25 | 
 26 | glove/*
 27 | models/*
 28 | 
 29 | # Dynamically Generated Files
 30 | vocab.dat
 31 | glove.dat
 32 | log.txt
 33 | 
 34 | # Byte-compiled / optimized / DLL files
 35 | __pycache__/
 36 | *.py[cod]
 37 | *$py.class
 38 | 
 39 | # C extensions
 40 | *.so
 41 | 
 42 | # Distribution / packaging
 43 | .Python
 44 | env/
 45 | build/
 46 | develop-eggs/
 47 | dist/
 48 | downloads/
 49 | eggs/
 50 | .eggs/
 51 | lib/
 52 | lib64/
 53 | parts/
 54 | sdist/
 55 | var/
 56 | wheels/
 57 | *.egg-info/
 58 | .installed.cfg
 59 | *.egg
 60 | 
 61 | # PyInstaller
 62 | #  Usually these files are written by a python script from a template
 63 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 64 | *.manifest
 65 | *.spec
 66 | 
 67 | # Installer logs
 68 | pip-log.txt
 69 | pip-delete-this-directory.txt
 70 | 
 71 | # Unit test / coverage reports
 72 | htmlcov/
 73 | .tox/
 74 | .coverage
 75 | .coverage.*
 76 | .cache
 77 | nosetests.xml
 78 | coverage.xml
 79 | *.cover
 80 | .hypothesis/
 81 | 
 82 | # Translations
 83 | *.mo
 84 | *.pot
 85 | 
 86 | # Django stuff:
 87 | *.log
 88 | local_settings.py
 89 | 
 90 | # Flask stuff:
 91 | instance/
 92 | .webassets-cache
 93 | 
 94 | # Scrapy stuff:
 95 | .scrapy
 96 | 
 97 | # Sphinx documentation
 98 | docs/_build/
 99 | 
100 | # PyBuilder
101 | target/
102 | 
103 | # Jupyter Notebook
104 | .ipynb_checkpoints
105 | 
106 | # pyenv
107 | .python-version
108 | 
109 | # celery beat schedule file
110 | celerybeat-schedule
111 | 
112 | # SageMath parsed files
113 | *.sage.py
114 | 
115 | # dotenv
116 | .env
117 | 
118 | # virtualenv
119 | .venv
120 | venv/
121 | ENV/
122 | 
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 | 
127 | # Rope project settings
128 | .ropeproject
129 | 
130 | # mkdocs documentation
131 | /site
132 | 
133 | # mypy
134 | .mypy_cache/
135 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Video Captioning [![Build Status](https://travis-ci.com/scopeInfinity/Video2Description.svg?branch=VideoCaption)](https://travis-ci.com/scopeInfinity/Video2Description) [![Hits](https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2FscopeInfinity%2FVideo2Description&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false)](https://hits.seeyoufarm.com)
  2 | Generate caption for the given video clip
  3 | 
  4 | Branch : [VideoCaption](https://github.com/scopeInfinity/Video2Description/tree/VideoCaption) (1a2124d), [VideoCaption_catt](https://github.com/scopeInfinity/Video2Description/tree/VideoCaption_catt) (647e73b4)
  5 | 
  6 | ### Model
  7 | 
  8 | Model generates natural sentence word by word
  9 | 
 10 | ![SentenceGenerationImage](https://github.com/scopeInfinity/Video2Description/raw/VideoCaption/images/sentence_model.png)
 11 | 
 12 | |    Audio SubModel     |     Video SubModel       |   Sentence Generation SubModel |
 13 | | :-------------: |:-------------:| :-----:|
 14 | | ![audio_model][audio_model]| ![video_model][video_model] | ![sentence_generation][sentence_generation]
 15 | 
 16 | [audio_model]: https://github.com/scopeInfinity/Video2Description/raw/VideoCaption/images/model_audio.png
 17 | [video_model]: https://github.com/scopeInfinity/Video2Description/raw/VideoCaption/images/model_video.png
 18 | [sentence_generation]: https://github.com/scopeInfinity/Video2Description/raw/VideoCaption/images/model_word.png
 19 | 
 20 | Context extraction for Temporal Attention Model, at i<sup>th</sup> word generation
 21 | 
 22 | ![AttentionModel](https://github.com/scopeInfinity/Video2Description/raw/VideoCaption/images/attention.png)
 23 | 
 24 | 
 25 | ### Results - *f5c22f7*
 26 | 
 27 | Test videos with good results
 28 | 
 29 | |         |            |   |
 30 | | :-------------: |:-------------:| :-----:|
 31 | | ![12727][12727]| ![12501][12501] | ![10802][10802]
 32 | | two men are talking about a cooking show | a  woman is cooking | a dog is running around a field |
 33 | | ![12968][12968] | ![12937][12937] | ![12939][12939]
 34 | | a woman is talking about a makeup face | a man is driving a car down the road | a man is cooking in a kitchen
 35 | | ![12683][12683] | ![12901][12901] | ![12994][12994]
 36 | | a man is playing a video game | two men are playing table tennis in a stadium | a man is talking about a computer program
 37 | 
 38 | 
 39 | Test videos with poor results
 40 | 
 41 | |         |            |   |
 42 | | :-------------: |:-------------:| :-----:|
 43 | | ![12589][12589]| ![12966][12966] | ![12908][12908]
 44 | |  a person is playing with a toy | a man is walking on the field | a man is standing in a gym |
 45 | 
 46 | [12727]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12727.gif
 47 | [12501]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12501.gif
 48 | [10802]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/10802.gif
 49 | 
 50 | [12968]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12968.gif
 51 | [12937]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12937.gif
 52 | [12939]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12939.gif
 53 | 
 54 | [12683]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12683.gif
 55 | [12901]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12901.gif
 56 | [12994]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12994.gif
 57 | 
 58 | 
 59 | [12589]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12589.gif
 60 | [12966]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12966.gif
 61 | [12908]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12908.gif
 62 | 
 63 | 
 64 | ### Try it out!!!
 65 | * Please feel free to raise PR with necessary suggestions.
 66 | * Clone the repository`
 67 |   * `git clone https://github.com/scopeInfinity/Video2Description.git`
 68 | * Install docker and docker-compose
 69 |   * Current config has docker-compose file format '3.2'.
 70 |     * https://github.com/docker/compose/releases
 71 |   * ```bash
 72 |     sudo apt-get install docker.io
 73 |     sudo curl -L "https://github.com/docker/compose/releases/download/1.25.4/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
 74 |     sudo chmod +x /usr/local/bin/docker-compose
 75 |     ```
 76 |   * docs
 77 |     * https://docs.docker.com/install/linux/docker-ce/ubuntu/
 78 |     * https://docs.docker.com/compose/install/
 79 | 
 80 | * Pull the prebuild images and run the container
 81 | ```bash
 82 | $ docker-compose pull
 83 | $ docker-compose up
 84 | ```
 85 | * Browse to `http://localhost:8080/`
 86 |   * backend might take few minutes to reach a stable stage.
 87 | 
 88 | ##### Execution without Docker
 89 | * We can go always go through `backend.Dockerfile` and `frontend.Dockerfile` to understand better.
 90 | * Update `src/config.json` as per the requirement and use those path during upcoming steps.
 91 |   * To know more about any field, just search for the reference in the codebase.
 92 | * Install miniconda
 93 |   * https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html
 94 | * Get `glove.6B.300d.txt` from `https://nlp.stanford.edu/projects/glove/`
 95 | * Install ffmpeg
 96 |   * Configure, build and install ffmpeg from source with shared libraries 
 97 | ```bash
 98 | $ git clone 'https://github.com/FFmpeg/FFmpeg.git'
 99 | $ cd FFmpeg
100 | $ ./configure --enable-shared  # Use --prefix if need to install in custom directory
101 | $ make
102 | # make install
103 | ```
104 | * If required, use `https://github.com/tylin/coco-caption/` for scoring the model.
105 | * Then create conda environment using `environment.yml`
106 |   * `$ conda env create -f environment.yml`
107 | * And activate the environment
108 | ```
109 | $ conda activate .
110 | ```
111 | * Turn up the backend
112 |   * `src$ python -m backend.parser server --start --model /path/to/model`
113 | * Turn up the web frontend
114 |   * `src$ python -m frontend.app`
115 | 
116 | ### Info
117 | 
118 | Data Directory and Working Directory can be same as the project root directory.
119 | 
120 | ### Data Directory
121 | File | Reference
122 | --- | --- 
123 | */path/to/data_dir/VideoDataset/videodatainfo_2017.json* | http://ms-multimedia-challenge.com/2017/dataset
124 | */path/to/data_dir/VideoDataset/videos/[0-9]+.mp4* | Download videos based on above dataset
125 | */path/to/data_dir/glove/glove.6B.300d.txt* | https://nlp.stanford.edu/projects/glove/
126 | */path/to/data_dir/VideoDataset/cache_40_224x224/[0-9]+.npy* | Video cached files will be created on fly
127 | 
128 | ### Working Directory
129 | File | Content
130 | --- | --- 
131 | */path/to/working_dir/glove.dat* | Pickle Dumped Glove Embedding
132 | */path/to/working_dir/vocab.dat* | Pickle Dumped Vocabulary Words
133 |   
134 | ### Download Dataset
135 | * Execute `python videohandler.py` from *VideoDataset* Directory
136 |   
137 | ### Execution
138 | It currently supports train, predict and server mode. Please use the following command for better explanation.
139 | ```bash
140 | src$ python -m backend.parse -h
141 | ```
142 |   
143 | ### Training Methods
144 | 
145 | * Try Iterative Learning
146 | * Try Random Learning  
147 | 
148 | ### Evaluation
149 | 
150 | #### Prerequisite
151 | ```bash
152 | cd /path/to/eval_dir/
153 | git clone 'https://github.com/tylin/coco-caption.git' cococaption
154 | ln /path/to/working_dir/cocoeval.py cococaption/
155 | ```
156 | #### Evaluate
157 | ```bash
158 | # One can do changes in parser.py for numbers of test examples to be considered in evaluation
159 | python parser.py predict save_all_test
160 | python /path/to/eval_dir/cocoeval.py <results file>.txt
161 | ```
162 | 
163 | #### Sample Evaluation while training
164 | 
165 | Commit | Training | Total | CIDEr | Bleu_4 | ROUGE_L | METEOR | Model Filename 
166 | --- | --- | --- | --- | --- | --- | --- | --- 
167 | 647e73b4 | 10 epochs | 1.1642 | 0.1580 | 0.3090 | 0.4917 | 0.2055 | CAttention_ResNet_D512L512_G128G64_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4990_loss_2.484_Cider0.360_Blue0.369_Rouge0.580_Meteor0.256
168 | 1a2124d | 17 epochs | 1.1599 | 0.1654 | 0.3022 | 0.4849 | 0.2074 | ResNet_D512L512_G128G64_D1024D0.20BN_BDLSTM1024_D0.2L1024DVS_model.dat_4987_loss_2.203_Cider0.342_Blue0.353_Rouge0.572_Meteor0.256
169 | f5c22f7 | 17 epochs | 1.1559 | 0.1680 | 0.3000 | 0.4832 | 0.2047 | ResNet_D512L512_G128G64_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4983_loss_2.350_Cider0.355_Blue0.353_Rouge0.571_Meteor0.247_TOTAL_1.558_BEST
170 | bd072ac | 11 CPUhrs with Multiprocessing (16 epochs)  |  1.0736 | 0.1528 | 0.2597 | 0.4674 | 0.1936 | ResNet_D512L512_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4986_loss_2.306_Cider0.347_Blue0.328_Rouge0.560_Meteor0.246 
171 | 3ccf5d5 | 15 CPUhrs |  1.0307 | 0.1258 | 0.2535 | 0.4619 | 0.1895 | res_mcnn_rand_b100_s500_model.dat_model1_3ccf5d5 
172 | 
173 | Check `Specifications` section for model comparision.
174 | 
175 | 
176 | Temporal attention Model for is on `VideoCaption_catt` branch.
177 | 
178 | Pre-trained Models : https://drive.google.com/open?id=1gexBRQfrjfcs7N5UI5NtlLiIR_xa69tK
179 | 
180 | ### Web Server
181 | 
182 | - Start the server **(S)** for to compute predictions (Within conda environment)
183 | ```bash
184 | python parser.py server -s -m <path/to/correct/model>
185 | ```
186 | - Check `config.json` for configurations.
187 | - Execute `python app.py` from webserver (No need for conda environment)
188 |   - Make sure, your the process is can new files inside `$UPLOAD_FOLDER`
189 | - Open `http://webserver:5000/` to open Web Server for testing (under default configuration)
190 | 
191 | ### Specifications
192 | 
193 | ##### Commit: 3ccf5d5
194 | - ResNet over LSTM for feature extraction
195 | - Word by Word generation based on last prediction for Sentence Generation using LSTM
196 | - Random Dataset Learning of training data
197 | - Vocab Size 9448
198 | - Glove of 300 Dimension
199 | 
200 | ##### Commit: bd072ac
201 | - ResNet over BiDirection GRU for feature extraction
202 | - Sequential Learning of training data
203 | - Batch Normalization + Few more tweaks in Model
204 | - Bleu, CIDEr, Rouge, Meteor score generation for validation
205 | - Multiprocessing keras
206 | 
207 | ##### Commit: f5c22f7
208 | - Audio with BiDirection GRU
209 | 
210 | ##### Commit: 1a2124d
211 | - Audio with BiDirection LSTM
212 | 
213 | ##### Commit: 647e73b
214 | - Audio with BiDirection GRU using temporal attention for context
215 | 
216 | # Image Captioning
217 | Generate caption for the given images
218 | 
219 | Branch : [onehot_gen](https://github.com/scopeInfinity/Video2Description/tree/onehot_gen)
220 | 
221 | Commit : [898f15778d40b67f333df0a0e744a4af0b04b16c](https://github.com/scopeInfinity/Video2Description/commit/898f15778d40b67f333df0a0e744a4af0b04b16c)
222 | 
223 | Trained Model : https://drive.google.com/open?id=1qzMCAbh_tW3SjMMVSPS4Ikt6hDnGfhEN
224 | 
225 | Categorical Crossentropy Loss : 0.58
226 | 
227 | 


--------------------------------------------------------------------------------
/REFERENCES.md:
--------------------------------------------------------------------------------
 1 | [1] J. Xu, T. Mei, T. Yao, Y. Rui. MSR-VTT: A Large Video Description Dataset for
 2 | Bridging Video and Language, In Proceedings of CVPR, 2016.
 3 | 
 4 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
 5 | Deep Residual Learning for Image Recognition 2016 IEEE Conference on Computer
 6 | Vision and Pattern Recognition (CVPR)
 7 | 
 8 | [3] Jeffrey Pennington, Richard Socher, and Christopher D. Manning. GloVe: Global Vec-
 9 | tors for Word Representation (2014)
10 | 
11 | [4] McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis, Matt McVicar, Eric Bat-
12 | tenberg, and Oriol Nieto. librosa: Audio and music signal analysis in python. In Pro-
13 | ceedings of the 14th python in science conference, pp. 18-25. 2015
14 | 
15 | [5] Oriol Vinyals, Alexander Toshev, Samy Bengio, Dumitru Erhan. Show and Tell: A
16 | Neural Image Caption Generator (April 2015)
17 | 
18 | [6] Jeff Donahue, Lisa Anne Hendricks, Marcus Rohrbach, Subhashini Venugopalan, Sergio
19 | Guadarrama, Kate Saenko, Trevor Darrell Long-term Recurrent Convolutional Networks
20 | for Visual Recognition and Description (Nov 2014)​
21 | 
22 | [7] Subhashini Venugopalan, Marcus Rohrbach, Jeff Donahue, Raymond Mooney, Trevor
23 | Darrell, Kate Saenko and Raymond Moone, Sequence to Sequence Video to Text (May
24 | 2015)
25 | 
26 | [8] Ilya Sutskever, Oriol Vinyals, Quoc V. Le, Sequence to Sequence Learning with Neural
27 | Networks (Sep 2014)
28 | 
29 | [9] Ramakrishna Vedantam, C. Lawrence Zitnick and Devi Parikh CIDEr: Consensus-
30 | based Image Description Evaluation, (The Computer Vision Foundation 2015)
31 | 
32 | [10] Alon Lavie and Michael Denkowski, The METEOR Metric for Automatic Evaluation
33 | of Machine Translation, Machine Translation, 2010
34 | 
35 | [11] Kishore Papineni,Salim Roukos,Todd Ward and Wei-Jing Zhu BLEU: a method for
36 | automatic evaluation of machine translation ACL ’02 Proceedings of the 40th Annual
37 | Meeting on Association for Computational Linguistics, Pages 311-318 (2002)
38 | 
39 | [12] Lin, C.Y.Rouge: A package for automatic evaluation of summaries. In Text Summa-
40 | rization Branches Out: Proceedings of the ACL 04 Workshop (pp. 74-81) (2004, July)
41 | 


--------------------------------------------------------------------------------
/backend.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:xenial as my_base
 2 | RUN apt-get update
 3 | RUN apt-get install -y libsamplerate0 curl libsndfile1 pkg-config nasm wget zip
 4 | 
 5 | FROM my_base as ffmpeg_builder
 6 | WORKDIR /tmp
 7 | RUN wget https://github.com/FFmpeg/FFmpeg/archive/master.zip -O ffmpeg.zip
 8 | RUN unzip ffmpeg.zip
 9 | RUN rm ffmpeg.zip
10 | WORKDIR /tmp/FFmpeg-master/
11 | RUN ./configure --enable-shared
12 | RUN make -j32
13 | 
14 | 
15 | FROM my_base as glove_builder
16 | WORKDIR /tmp
17 | # https://nlp.stanford.edu/projects/glove/
18 | RUN wget http://nlp.stanford.edu/data/glove.6B.zip && \
19 |     unzip glove.6B.zip glove.6B.300d.txt && \
20 |     rm glove.6B.zip
21 | 
22 | 
23 | FROM my_base as deploy
24 | # FROM conda/miniconda2
25 | RUN apt-get update
26 | RUN apt-get install -y libsamplerate0 curl libsndfile1 pkg-config nasm wget zip
27 | RUN useradd -m -s /bin/bash si
28 | RUN mkdir /var/log/v2d
29 | RUN chown si:si /var/log/v2d
30 | RUN chmod 700 /var/log/v2d
31 | USER si
32 | 
33 | # Installing miniconda
34 | RUN wget -N https://repo.anaconda.com/miniconda/Miniconda2-latest-Linux-x86_64.sh -O /tmp/Miniconda2-latest-Linux-x86_64.sh
35 | RUN bash /tmp/Miniconda2-latest-Linux-x86_64.sh -b
36 | RUN rm /tmp/Miniconda2-latest-Linux-x86_64.sh
37 | USER root
38 | RUN ln -s /home/si/miniconda2/bin/conda /usr/bin/
39 | USER si
40 | 
41 | # glove
42 | RUN mkdir -p /home/si/v2d/dataset
43 | WORKDIR /home/si/v2d/dataset
44 | COPY --from=glove_builder /tmp/glove.6B.300d.txt /home/si/v2d/dataset/glove.6B.300d.txt
45 | 
46 | # ffmpeg build and install
47 | COPY --from=ffmpeg_builder /tmp/FFmpeg-master/ /tmp/FFmpeg-master/
48 | WORKDIR /tmp/FFmpeg-master/
49 | USER root
50 | RUN make install
51 | USER si
52 | RUN echo 'export LD_LIBRARY_PATH=/usr/local/lib' >> /home/si/.bashrc
53 | 
54 | # coco-caption
55 | WORKDIR /home/si
56 | RUN wget -N 'https://github.com/tylin/coco-caption/archive/master.zip' -O coco.zip && \
57 |     unzip coco.zip && \
58 |     mv coco-caption-master coco-caption && \
59 |     rm coco.zip
60 | 
61 | # Create conda environment
62 | # Note: ffmpeg with --enable-shared should be before installing opencv
63 | WORKDIR /home/si/v2d/
64 | COPY --chown=si:si environment.yml /home/si/v2d/
65 | RUN conda env create -f environment.yml
66 | RUN conda init bash
67 | RUN echo "conda activate V2D" >> /home/si/.bashrc
68 | 
69 | # Prepare basic files
70 | ENV V2D_CONFIG_FILE=config_docker.json
71 | RUN mkdir -p /home/si/v2d/dataset
72 | RUN mkdir -p /home/si/v2d/dataset_cache
73 | RUN mkdir -p /home/si/v2d/models
74 | RUN mkdir -p /tmp/v2d/app/uploads
75 | COPY --chown=si:si dataset/videodatainfo_2017.json /home/si/v2d/dataset/
76 | COPY --chown=si:si dataset/test_videodatainfo_2017.json /home/si/v2d/dataset/
77 | COPY --chown=si:si src/ /home/si/v2d/src/
78 | WORKDIR /home/si/v2d/src
79 | 
80 | # Prepares cache for pretrained model
81 | COPY --chown=si:si models/ /home/si/v2d/models/
82 | WORKDIR /home/si/v2d/models/
83 | RUN wget -q -N 'https://github.com/scopeInfinity/Video2Description/releases/download/models/ResNet_D512L512_G128G64_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4983_loss_2.350_Cider0.355_Blue0.353_Rouge0.571_Meteor0.247_TOTAL_1.558_BEST'
84 | RUN echo "Available Models:"
85 | RUN ls -1 /home/si/v2d/models
86 | 
87 | WORKDIR /home/si/v2d/src/
88 | RUN conda run -n V2D python -m backend.parser server --init-only -m /home/si/v2d/models/ResNet_D512L512_G128G64_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4983_loss_2.350_Cider0.355_Blue0.353_Rouge0.571_Meteor0.247_TOTAL_1.558_BEST
89 | 


--------------------------------------------------------------------------------
/dataset/videos/README.md:
--------------------------------------------------------------------------------
1 | Download dataset video here


--------------------------------------------------------------------------------
/dataset_cache/README.md:
--------------------------------------------------------------------------------
1 | Directory to store datastore cache
2 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.2'
 2 | services:
 3 |   backend:
 4 |     image: scopeinfinity/video2description:deploy
 5 |     environment:
 6 |       V2D_CONFIG_FILE: config_docker.json
 7 |     networks:
 8 |       internal:
 9 |         ipv4_address: 172.14.0.2
10 |     volumes:
11 |        - uploads:/home/si/v2d/uploads/:ro
12 |        - ./src:/home/si/v2d/src/:ro
13 |     entrypoint: /bin/bash -i -c 'python -m backend.parser server -s -m /home/si/v2d/models/ResNet_D512L512_G128G64_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4983_loss_2.350_Cider0.355_Blue0.353_Rouge0.571_Meteor0.247_TOTAL_1.558_BEST 2>&1'
14 | 
15 |   frontend:
16 |     build:
17 |       context: .
18 |       dockerfile: frontend.Dockerfile
19 |     image: scopeinfinity/video2description:frontend
20 |     ports:
21 |       - "8080:5000"
22 |     environment:
23 |       V2D_CONFIG_FILE: config_docker.json
24 |     networks:
25 |       internal:
26 |         ipv4_address: 172.14.0.3
27 |     volumes:
28 |        - uploads:/home/si/v2d/uploads/
29 |        - ./src:/home/si/v2d/src/:ro
30 |     entrypoint: /bin/bash -c 'python -m frontend.app 2>&1'
31 | 
32 | volumes:
33 |   uploads:
34 | 
35 | networks:
36 |   internal:
37 |      ipam:
38 |       driver: default
39 |       config:
40 |         - subnet: "172.14.0.0/24"


--------------------------------------------------------------------------------
/docker_build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | remote="scopeinfinity/video2description"
3 | for file_tag in "backend.Dockerfile ffmpeg_builder" "backend.Dockerfile glove_builder" "backend.Dockerfile deploy" "frontend.Dockerfile frontend"; do
4 | 	set -- $file_tag
5 | 	docker build --target $2 -t $remote:$2 --cache-from $remote:$2 --build-arg BUILDKIT_INLINE_CACHE=1 -f $1 .
6 | done
7 | 


--------------------------------------------------------------------------------
/docker_hub.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | docker $1 scopeinfinity/video2description:frontend
3 | docker $1 scopeinfinity/video2description:ffmpeg_builder
4 | docker $1 scopeinfinity/video2description:glove_builder
5 | docker $1 scopeinfinity/video2description:deploy


--------------------------------------------------------------------------------
/docker_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | trap 'kill $(jobs -p) || echo "No background jobs"' EXIT
 5 | 
 6 | TIMEOUT_WAIT_FOR_BACKEND=${1:-5}  # in minutes
 7 | 
 8 | echo "[docker][backend] ./run_tests.sh"
 9 | docker container run scopeinfinity/video2description:deploy conda run -n V2D /bin/bash -c 'cd /home/si/v2d/src/ && ./run_tests.sh'
10 | 
11 | docker-compose up --detach
12 | docker-compose logs -f &
13 | 
14 | for x in `seq ${TIMEOUT_WAIT_FOR_BACKEND}`;do
15 |     sleep "1m";
16 |     curl "http://localhost:8080/model_weights_status" 2>&1 | tee /dev/stderr | grep -q '\[SUCCESS\]' && break;
17 | done 2>&1 || { echo "Backend model_weights_status failed to come to success"; exit 1; }
18 | echo "Backend model_weights_status: SUCCESS"
19 | 
20 | 
21 | # Run tests external to docker
22 | echo "[external] Executing tests on [docker][deploy]"
23 | python -m unittest discover tests/
24 | 
25 | docker-compose down


--------------------------------------------------------------------------------
/early_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Execute light tests which can be called before setting up environment to save time and resources.
4 | cd src/
5 | python -m unittest tests.env.test_config


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: V2D
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 | dependencies:
 6 |   - pip
 7 |   - python=2.7
 8 |   - librosa
 9 |   - pip:
10 |     - six
11 |     - opencv-python-headless==4.1.1.26
12 |     - numpy
13 |     - flask
14 |     - matplotlib
15 |     - pylint
16 |     - h5py<3.0.0
17 |   - gtk2
18 |   - urllib3
19 |   - waitress
20 |   - keras==2.0.8
21 |   - tensorflow==1.2.1
22 | 


--------------------------------------------------------------------------------
/f5c22f7_images/10802.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/10802.gif


--------------------------------------------------------------------------------
/f5c22f7_images/12501.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12501.gif


--------------------------------------------------------------------------------
/f5c22f7_images/12589.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12589.gif


--------------------------------------------------------------------------------
/f5c22f7_images/12683.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12683.gif


--------------------------------------------------------------------------------
/f5c22f7_images/12727.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12727.gif


--------------------------------------------------------------------------------
/f5c22f7_images/12901.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12901.gif


--------------------------------------------------------------------------------
/f5c22f7_images/12908.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12908.gif


--------------------------------------------------------------------------------
/f5c22f7_images/12937.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12937.gif


--------------------------------------------------------------------------------
/f5c22f7_images/12939.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12939.gif


--------------------------------------------------------------------------------
/f5c22f7_images/12966.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12966.gif


--------------------------------------------------------------------------------
/f5c22f7_images/12968.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12968.gif


--------------------------------------------------------------------------------
/f5c22f7_images/12994.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12994.gif


--------------------------------------------------------------------------------
/frontend.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:2 as frontend
 2 | RUN pip install enum34 flask waitress
 3 | 
 4 | RUN useradd -m -s /bin/bash si
 5 | RUN mkdir -p /home/si/v2d/uploads
 6 | RUN chown si:si /home/si/v2d/uploads
 7 | USER si
 8 | 
 9 | # Prepare basic files
10 | ENV V2D_CONFIG_FILE=config_docker.json
11 | RUN mkdir -p /tmp/v2d/app/uploads
12 | COPY --chown=si:si src/frontend /home/si/v2d/src/frontend/
13 | COPY --chown=si:si src/common /home/si/v2d/src/common/
14 | COPY --chown=si:si src/*.json /home/si/v2d/src/
15 | COPY --chown=si:si src/__init__.py /home/si/v2d/src/__init__.py
16 | WORKDIR /home/si/v2d/src


--------------------------------------------------------------------------------
/images/attention.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/images/attention.png


--------------------------------------------------------------------------------
/images/model_audio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/images/model_audio.png


--------------------------------------------------------------------------------
/images/model_video.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/images/model_video.png


--------------------------------------------------------------------------------
/images/model_word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/images/model_word.png


--------------------------------------------------------------------------------
/images/sentence_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/images/sentence_model.png


--------------------------------------------------------------------------------
/keep_alive.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Keep streaming something every minute upto X or job get completed whichever first.
3 | TIMEOUT="${1:?}"
4 | EXEC="${2:?}"
5 | shift 2
6 | timeout "${TIMEOUT}m" bash -c 'while true;do echo "Time: $(date)"; sleep 1m;done;' &
7 | TIMER_PID="$!"
8 | (timeout "${TIMEOUT}m" $EXEC "$@";kill $TIMER_PID)
9 | echo "Exiting keep alive"


--------------------------------------------------------------------------------
/models/README.md:
--------------------------------------------------------------------------------
1 | Store trained models here


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | parameterized
2 | selenium


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/__init__.py


--------------------------------------------------------------------------------
/src/backend/CombinedResults/calculate_score_results.sh:
--------------------------------------------------------------------------------
1 | ls result*.txt | xargs -I {} sh -c '[ ! -f eval_{} ] && python ../../cococaption/cocoeval.py {} | tail -n 1 > eval_{}'
2 | 


--------------------------------------------------------------------------------
/src/backend/CombinedResults/calculate_total_score_json.py:
--------------------------------------------------------------------------------
1 | import ast
2 | a = ast.literal_eval(raw_input().strip())
3 | z=0
4 | for x in a.keys():
5 | 	if x[-1] == '3' or x[-1] == '2' or x[-1] == '1':
6 | 		continue
7 | 	z+=a[x]
8 | print(z)
9 | 


--------------------------------------------------------------------------------
/src/backend/CombinedResults/find_total_sentences_unique.py:
--------------------------------------------------------------------------------
1 | import json, sys
2 | z = json.load(open(sys.argv[1]))
3 | sentences = [x['caption'] for x in z['predicted']]
4 | print("%d Unique sentences out of %d"%(len(set(sentences)),len(sentences)))
5 | 


--------------------------------------------------------------------------------
/src/backend/CombinedResults/summary.sh:
--------------------------------------------------------------------------------
1 | ls result_* | xargs -I {} sh -c 'echo {}; [ -f eval_{} ] && cat eval_{} &&cat eval_{} | python calculate_total_score_json.py;python find_total_sentences_unique.py {};echo ""'
2 | 


--------------------------------------------------------------------------------
/src/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/backend/__init__.py


--------------------------------------------------------------------------------
/src/backend/data.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import json
  3 | import os
  4 | import re
  5 | 
  6 | from pytube import YouTube
  7 | from subprocess import check_output
  8 | 
  9 | DIR = 'Videos'
 10 | CATEGORIES = (1<<3)
 11 | 
 12 | with open('train_2017/videodatainfo_2017.json') as f:
 13 |     vdi = json.loads(f.read())
 14 |     _videos={}
 15 |     for v in vdi['videos']:
 16 |         if ((1<<v['category'])&CATEGORIES)>0:
 17 |             _videos[v['video_id']] = { 'url' : v['url'] }
 18 |     for s in vdi['sentences']:
 19 |         if s['video_id'] in _videos.keys():
 20 |             _videos[s['video_id']]['caption'] = s['caption']
 21 |     
 22 | def download_all():
 23 |     count = 0
 24 |     for _id in _videos.keys():
 25 |         print("Dowloading %s " % _id)
 26 |         getVideoFname(_id)
 27 |         count+=1
 28 |         print("%3.2f %% Completed" % (100.0*count/len(_videos.keys())))
 29 | 
 30 | def sz_videos():
 31 |     return len(_videos)
 32 | 
 33 | def get_videoId(index):
 34 |     v = _videos.keys()[index]
 35 |     return v
 36 | 
 37 | def getVideoFname(videoId):
 38 |     try:
 39 |         fname = DIR+"/"+videoId+".mp4"
 40 |         # Caching
 41 |         if os.path.isfile(fname):
 42 |             print("Used cached video file %s " % fname)
 43 |             return fname
 44 |         url = _videos[videoId]['url']
 45 |         print("Fetching info from %s " % url)
 46 |         yt = YouTube(url)
 47 |         v = yt.filter('mp4')[0]
 48 |         # For Non mp4, NOT SUPPORTED for now
 49 |         # if v is None:
 50 |         #     v = yt.videos()[0]
 51 |         dfname = DIR+"/"+v.filename+".mp4"
 52 |         if v:
 53 |             print("Video Downloading %s " % videoId)
 54 |             v.download(DIR)
 55 |             print("Moving %s to %s " % (dfname,fname))
 56 |             os.rename(dfname,fname)
 57 |             print("Video Downloaded")
 58 |             return fname
 59 |         else: 
 60 |             print("Video not Found for %s " % videoId)
 61 |             return None
 62 |     except Exception as e:
 63 |         print(str(e))
 64 |         return None
 65 |         
 66 | 
 67 | def getCaption(videoId):
 68 |     return _videos[videoId]['caption']
 69 | 
 70 | def getDuration(fname):
 71 |     return int(float(os.popen("ffprobe -i %s -show_format 2>&1 | grep duration | sed 's/duration=//'" % (fname,)).read()))
 72 | 
 73 | def getFrame(fname,ts):
 74 |     iname = 'Videos/frames/frame.png'
 75 |     hr = ts//3600
 76 |     ts = ts%(3600)
 77 |     mi = ts//60
 78 |     ts = ts%60
 79 |     time = "%02d:%02d:%02d" % (hr,mi,ts)
 80 |     print("getting frame for time %s " % time)
 81 |     os.popen("ffmpeg -y -ss %s -i %s -frames 1 %s" % (time,fname,iname))
 82 |     img = cv2.imread(iname)
 83 |     return img
 84 |     
 85 | def getVideo(videoId):
 86 |     fname = getVideoFname(videoId)
 87 |     if fname is None:
 88 |         return None
 89 |     print("Loading Video %s " % fname)
 90 |     duration = getDuration(fname)
 91 |     print("Duration " + str(duration) +" sec")
 92 |     COUNT = 5
 93 |     if duration < 15*COUNT:
 94 |         print("Video too short")
 95 |         return None
 96 |     frames = []
 97 |     for i in range(COUNT):
 98 |         image = getFrame(fname,15*(i+1))
 99 |         frames.append(image)
100 |     return frames
101 |     
102 |  
103 | 


--------------------------------------------------------------------------------
/src/backend/framework.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import csv
  3 | import json
  4 | import numpy as np
  5 | import os
  6 | import shutil
  7 | import sys
  8 | 
  9 | from keras import callbacks
 10 | from pprint import pformat
 11 | from random import shuffle
 12 | 
 13 | from backend.model import VModel
 14 | from backend.vpreprocess import  Preprocessor
 15 | from common.config import get_app_config, get_vpreprocess_config
 16 | from common.logger import logger
 17 | from common.status import ModelWeightsStatus
 18 | 
 19 | 
 20 | WORKERS = 40
 21 | DATASET_CACHE = get_app_config()["DATASET_CACHE"]
 22 | COCOFNAME = get_vpreprocess_config()["COCOFNAME"]
 23 | 
 24 | CLABEL = 'ResNet_D512L512_G128G64_D1024D0.20BN_BDLSTM1024_D0.2L1024DVS'
 25 | 
 26 | state_uninit = {'epochs':5000, 'start_batch':0, 'batch_size':100, 'saveAtBatch':500, 'steps_per_epoch':500}
 27 | 
 28 | MFNAME = DATASET_CACHE+'/'+CLABEL+'_model.dat'
 29 | _MFNAME = DATASET_CACHE+'/'+CLABEL+'_model.dat.bak'
 30 | STATE = DATASET_CACHE+'/'+CLABEL+'_state.txt'
 31 | RESULTS = DATASET_CACHE+'/'+CLABEL+'_results.txt'
 32 | FRESTART = DATASET_CACHE+'/restart'
 33 | PREDICT_BATCHSIZE = 200
 34 | 
 35 | class TrainingLogs:
 36 |     def __init__(self, prefix=""):
 37 |         self.epochLogHistory = []
 38 |         self.fname = DATASET_CACHE+'/'+CLABEL + "_logs_" + prefix + ".txt"
 39 | 
 40 |     def flush(self):
 41 |         if not os.path.exists(self.fname):
 42 |             with open(self.fname, "w") as f:
 43 |                 wr = csv.writer(f)
 44 |         if len(self.epochLogHistory) > 0:
 45 |             with open(self.fname, "a") as f:
 46 |                 wr = csv.writer(f)
 47 |                 for h in self.epochLogHistory:
 48 |                     wr.writerow(h)
 49 |             self.epochLogHistory = []
 50 |             logger.debug("Training Logs flushed")
 51 | 
 52 |     def add(self,cont):
 53 |         MXCol = 15
 54 |         dat = [-1] * 15
 55 |         for i in range(min(MXCol,len(cont))):
 56 |             dat[i]=cont[i]
 57 |         self.epochLogHistory.append(dat)
 58 | 
 59 | class ModelGeneratorCallback(callbacks.Callback):
 60 | 
 61 |     def __init__(self, state, tlogs, elogs, framework):
 62 |         self.state = state
 63 |         self.lastloss = float('inf')
 64 |         self.tlogs = tlogs
 65 |         self.elogs = elogs
 66 |         self.last_epochmodel = None
 67 |         self.framework = framework
 68 |         self.batchTrainedCounter = 0
 69 |         self.bestlossepoch = float('inf')
 70 | 
 71 |     def on_epoch_end(self, epoch, logs={}):
 72 |         logger.debug("Epoch %d End " % epoch)
 73 |         self.state['epochs']-=1
 74 |         loss = logs['loss']
 75 |         acc  = logs['acc']
 76 |         valloss = logs['val_loss']
 77 |         valacc  = logs['val_acc']
 78 |         # Sample Content
 79 |         # {'CIDEr': 0.11325126353463148, 'Bleu_4': 0.1706107390467726, 'Bleu_3': 0.27462591349020055, 'Bleu_2': 0.4157995334621001, 'Bleu_1': 0.6064295446876932, 'ROUGE_L': 0.40471970665189977, 'METEOR': 0.17162570735633326}
 80 |         coco_json = self.framework.eval_onvalidation()
 81 |         cider = coco_json['CIDEr']
 82 |         bleu4 = coco_json['Bleu_4']
 83 |         rouge = coco_json['ROUGE_L']
 84 |         meteor = coco_json['METEOR']
 85 |         ename = "%.3f_Cider%.3f_Blue%.3f_Rouge%.3f_Meteor%.3f" % (valloss, cider, bleu4, rouge, meteor)
 86 |         self.elogs.add([epoch,loss, acc, valloss, valacc, cider, bleu4, rouge, meteor])
 87 |         self.elogs.flush()
 88 |         if valloss < self.bestlossepoch or True:
 89 |             to_rm = self.last_epochmodel
 90 |             self.last_epochmodel = self.framework.save(epoch=("%03d_loss_%s" % (self.state['epochs'],ename)))
 91 |             self.bestlossepoch = valloss
 92 |             if to_rm is not None:
 93 |                 pass
 94 |                 # os.remove(to_rm)
 95 |         return
 96 | 
 97 |     def on_batch_end(self, batch, logs={}):
 98 |         logger.debug("Batch %d ends" % batch)
 99 |         valloss = -1
100 |         valacc  = -1
101 |         loss = logs['loss']
102 |         acc  = logs['acc']
103 |         self.lastloss = loss
104 |         print("Keys Logger %s " % str(logs.keys()))
105 |         self.tlogs.add([batch, loss, acc, valloss, valacc])
106 |         self.state['start_batch'] += 1
107 |         self.batchTrainedCounter += 1
108 |         logger.debug("Batches Trained : %d" % self.batchTrainedCounter)
109 |         if self.batchTrainedCounter % self.state['saveAtBatch'] == 0:
110 |             logger.debug("Preparing To Save")
111 |             self.framework.save()
112 |             self.tlogs.flush()
113 |         
114 | 
115 | class Framework():
116 |     
117 |     def __init__(self, model_load = MFNAME, train_mode = False):
118 |         self.mode_learning = train_mode
119 |         self.state = state_uninit
120 |         self.file_model = model_load
121 |         self.status_model_weights = ModelWeightsStatus.NO_INFO
122 |         self.tlogs = TrainingLogs()
123 |         self.elogs = TrainingLogs(prefix = "epoch_")
124 |         self.model = None          # Init in self.build_model()
125 |         self.preprocess = Preprocessor()
126 |         self.build_model()
127 |         self.load()
128 |         logger.debug("__init__ framework complete")
129 | 
130 |     def build_model(self):
131 |         vocab = self.preprocess.vocab
132 |         self.vmodel = VModel(vocab.CAPTION_LEN, vocab.VOCAB_SIZE, learning = self.mode_learning)
133 |         self.model = self.vmodel.get_model()
134 |         assert self.preprocess is not None
135 |         self.preprocess.set_vmodel(self.vmodel)
136 | 
137 |     def load(self):
138 |         logger.debug("Model Path: %s" % self.file_model)
139 |         if os.path.exists(self.file_model):
140 |             self.model.load_weights(self.file_model)
141 |             self.status_model_weights = ModelWeightsStatus.SUCCESS
142 |             logger.debug("Weights Loaded")
143 |         else:
144 |             self.status_model_weights = ModelWeightsStatus.MODEL_NOT_FOUND
145 |             logger.warning("Weights files not found.")
146 |         if os.path.exists(STATE):
147 |             with open(STATE) as f:
148 |                 self.state = json.load(f)
149 |                 logger.debug("State Loaded")
150 | 
151 |     def get_weights_status(self):
152 |         return str(self.status_model_weights)
153 | 
154 |     def save(self, epoch='xx'):
155 |         try:
156 |             pass
157 |         finally:
158 |             tname = _MFNAME
159 |             self.model.save_weights(tname)
160 |             fname = self.file_model
161 |             if epoch != 'xx':
162 |                 fname = self.file_model + '_' + epoch
163 |             shutil.copy2(tname,fname)
164 |             os.remove(tname)
165 |             logger.debug("Weights Saved")
166 |             with open(STATE,'w') as f:
167 |                 json.dump(self.state,f)
168 |                 logger.debug("State Saved")
169 |             return fname
170 |         return None
171 | 
172 |     def train_generator(self):
173 |         epochs = self.state['epochs']
174 |         bs = self.state['batch_size']
175 |         steps_per_epoch = self.state['steps_per_epoch']
176 |         validation_steps = 1
177 |         logger.debug("Epochs Left : %d " % epochs)
178 |         logger.debug("Batch Size  : %d " % bs)
179 | 
180 |         train_dg = self.preprocess.data_generator(bs, start=self.state['start_batch'], typeSet = 0)
181 |         val_dg = self.preprocess.data_generator(bs, -1, typeSet = 1)
182 |         logger.debug("Attemping to fit")
183 |         callbacklist = [ModelGeneratorCallback(self.state, self.tlogs, self.elogs, self)]
184 |         self.vmodel.train_mode()
185 |         self.model.fit_generator(train_dg, steps_per_epoch=steps_per_epoch, epochs=epochs,
186 |                                  verbose=1,validation_data=val_dg, validation_steps=validation_steps,
187 |                                  initial_epoch=0, callbacks=callbacklist,
188 |                                  workers=WORKERS, use_multiprocessing=True)
189 | 
190 |     def predict_model_direct(self, fnames, cache_ids = None):
191 |         videoVecs = []
192 |         audioVecs = []
193 |         for i in range(len(fnames)):
194 |             cid = None
195 |             if cache_ids is not None:
196 |                 cid = cache_ids[i]
197 |             vid_audio = self.preprocess.get_video_content(fnames[i], cache_id = cid)
198 |             if vid_audio is None:
199 |                 return None,{'error':'Video %d couldn\'t be loaded. %s ' % (i, fnames[i])}
200 |             videoVecs.append(vid_audio[0]) # Video Features
201 |             audioVecs.append(vid_audio[1]) # Audio Features
202 |         videoVecs = np.array(videoVecs)
203 |         audioVecs = np.array(audioVecs)
204 | 
205 |         # videoVecs =np.array([self.preprocess.get_video_content(f) for f  in fnames])
206 |         count = len(fnames)
207 |         logger.debug("Predicting for Videos :- \n\t%s " % fnames)
208 |         l = 0
209 |         vocab = self.preprocess.vocab
210 |         startCapRow = [vocab.wordEmbedding[vocab.specialWords['START']] ]
211 |         startCapRow.extend([ vocab.wordEmbedding[vocab.specialWords['NONE']] ] * vocab.CAPTION_LEN)
212 |         
213 |         embeddedCap = np.array([ startCapRow  ] * count)
214 |         logger.debug("Shape of Caption : %s", str(np.shape(embeddedCap)))
215 |         stringCaption = []
216 |         for i in range(count):
217 |             stringCaption.append([])
218 |         while l < vocab.CAPTION_LEN:
219 |             newOneHotCap = self.model.predict([embeddedCap, audioVecs, videoVecs])
220 |             print("Shape of out Predict Model : %s " % str(np.shape(newOneHotCap)))
221 |             for i,newOneHotWord in enumerate(newOneHotCap):
222 |                 nword = vocab.word_fromonehot(newOneHotWord[l])
223 |                 # print(str(i)+" "+str(l)+" "+nword)
224 |                 stringCaption[i].append( nword )
225 |                 if l + 1 != vocab.CAPTION_LEN:
226 |                     embeddedCap[i][l+1] = vocab.wordEmbedding[nword]
227 | 
228 |             print([' '.join(cap) for cap in stringCaption])
229 |             l += 1
230 |         logger.debug("Prediction Complete")
231 |         captionObject = []
232 |         for i,cap in enumerate(stringCaption):
233 |             captionObject.append({'fname':fnames[i], 'caption':cap})
234 |         return stringCaption, captionObject
235 | 
236 |     def predict_ids(self, _ids):
237 |         logger.debug("Trying to predict for %s" % (_ids,))
238 |         result = self.predict_model(_ids = _ids)
239 |         return result
240 | 
241 |     def predict_fnames(self, fnames):
242 |         logger.debug("Trying to predict for %s" % (fnames,))
243 |         result = self.predict_model(fnames = fnames)
244 |         return result
245 | 
246 |     def predict_model(self, _ids = None, fnames = None):
247 |         assert (_ids is None) ^ (fnames is None)
248 |         vHandler = self.preprocess.vHandler
249 |         if fnames is None:
250 |             fnames = []
251 |             for _id in _ids:
252 |                 logger.debug("Obtaining fname for %d" % _id)
253 |                 fname = vHandler.downloadVideo(_id)
254 |                 if fname is None:
255 |                     logger.info("Ignoring %d video " % _id)
256 |                 else:
257 |                     fnames.append(fname)
258 | 
259 |         batch_size = PREDICT_BATCHSIZE
260 |         batch_count = (len(fnames)+batch_size-1)//batch_size
261 |         predictions,output = ([],[])
262 |         for i in range(batch_count):
263 |             cids = None
264 |             if _ids is not None:
265 |                 cids = _ids[i*batch_size:(i+1)*batch_size]
266 |             pred,out = self.predict_model_direct(fnames[i*batch_size:(i+1)*batch_size], cache_ids = cids)
267 |             if pred is None:
268 |                 logger.debug(json.dumps(out))
269 |                 assert False
270 |             predictions.extend(pred)
271 |             output.extend(out)
272 |         results = []
273 |         for i in range(len(fnames)):
274 |             logger.debug("For eog %s" % fnames[i])
275 |             predictedCaption = ' '.join(predictions[i])
276 |             logger.debug("Predicted Caption : %s" % predictedCaption )
277 |             actualCaption = None
278 |             if _ids is not None:
279 |                 actualCaption = vHandler.getCaptionData()[_ids[i]]
280 |                 logger.debug("Actual Captions - \n%s" % pformat(actualCaption) )
281 |             res = dict()
282 |             res['fname'] = fnames[i]
283 |             res['output'] = predictedCaption
284 |             res['actual'] = actualCaption
285 |             results.append(res)
286 |         return json.dumps(results, indent=4, sort_keys=True)
287 |                                         
288 |     def isVideoExtension(self, fname):
289 |         for ext in ['mp4','jpeg','png']:
290 |             if fname.endswith('.'+ext):
291 |                 return True
292 |         return False
293 | 
294 |     def predict_test(self, dirpath, mxc):
295 |         videos = ["%s/%s" % (dirpath,vid) for vid in os.listdir(dirpath) if self.isVideoExtension(vid)][0:mxc]
296 |         self.predict_model(fnames = videos)
297 | 
298 |     def clean_caption(self, msg):
299 |         if '<' in msg:
300 |             return msg.split("<")[0]
301 |         return msg
302 | 
303 |     def save_all(self, _ids, save = RESULTS):
304 |         _result = json.loads(self.predict_ids(_ids))
305 |         test_predicted = []
306 |         test_actual = []
307 |         for res in _result:
308 |             tp = dict()
309 |             _id = int(res['fname'].split('/')[-1].split('.')[0])
310 |             tp['video_id'] =  _id
311 |             tp['caption'] =  self.clean_caption(res['output'])
312 |             test_predicted.append(tp)
313 | 
314 |             for cap in res['actual']:
315 |                 tp_actual = dict()
316 |                 tp_actual['video_id'] = _id
317 |                 tp_actual['caption'] = cap
318 |                 test_actual.append(tp_actual)
319 |         result = dict()
320 |         result['predicted'] = test_predicted
321 |         result['actual'] = test_actual
322 |         with open(save, 'w') as f:
323 |             f.write(json.dumps(result))
324 |         logger.debug("Result Saved")
325 |     
326 |     def eval_onvalidation(self):
327 |         fname = '/tmp/save_model_' + CLABEL
328 |         logger.debug("Calculating cocoscore")
329 |         valids = self.preprocess.vHandler.getValidationIds()
330 |         self.save_all(valids, save = fname)
331 |         cmd = "python %s %s | tail -n 1" % (COCOFNAME, fname)
332 |         coco = ast.literal_eval(os.popen(cmd).read().strip())
333 |         logger.debug("Done")
334 |         logger.debug("Coco Scores :%s\n" % json.dumps(coco,indent=4, sort_keys=True))
335 |         return coco
336 | 
337 |     def get_testids(self, count = -1):
338 |         ids = self.preprocess.vHandler.getTestIds()
339 |         if count == -1:
340 |             count = len(ids)
341 |         else:
342 |             shuffle(ids)
343 |         return ids[:count]
344 | 
345 |     def get_valids(self, count = -1):
346 |         ids = self.preprocess.vHandler.getValidationIds()
347 |         if count == -1:
348 |             count = len(ids)
349 |         else:
350 |             shuffle(ids)
351 |         return ids[:count]
352 | 
353 |     def get_trainids(self, count = -1):
354 |         ids = self.preprocess.vHandler.getTrainingIds()
355 |         if count == -1:
356 |             count = len(ids)
357 |         else:
358 |             shuffle(ids)
359 |         return ids[:count]
360 | 


--------------------------------------------------------------------------------
/src/backend/model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import sys
  4 | 
  5 | from keras.applications import ResNet50, VGG16
  6 | from keras.applications.inception_v3 import InceptionV3
  7 | from keras.applications.resnet50 import preprocess_input
  8 | from keras.layers import Dropout, Merge, Flatten, RepeatVector, Activation
  9 | from keras.layers import Embedding, Conv2D, MaxPooling2D, LSTM, GRU, BatchNormalization
 10 | from keras.layers import TimeDistributed, Dense, Input, Flatten, GlobalAveragePooling2D, Bidirectional
 11 | from keras.models import Model
 12 | from keras.models import Sequential
 13 | from keras.optimizers import RMSprop
 14 | from keras.preprocessing import image
 15 | from keras.regularizers import l2
 16 | import keras.backend as K
 17 | import tensorflow as tf
 18 | 
 19 | from backend.vocab import Vocab
 20 | from common.logger import logger
 21 | 
 22 | def sentence_distance(y_true, y_pred):
 23 |     return K.sqrt(K.sum(K.square(K.abs(y_true-y_pred)),axis=1,keepdims=True))
 24 | 
 25 | class VModel:
 26 | 
 27 |     def __init__(self, CAPTION_LEN, VOCAB_SIZE, cutoffonly = False, learning = True):
 28 |         self.CAPTION_LEN = CAPTION_LEN
 29 |         self.VOCAB_SIZE  = VOCAB_SIZE
 30 |         if not cutoffonly:
 31 |             self.build_mcnn(self.CAPTION_LEN, self.VOCAB_SIZE, learning = learning)
 32 |         self.build_cutoffmodel()
 33 | 
 34 |     def  get_model(self):
 35 |         return self.model
 36 | 
 37 |     '''
 38 |     Attempt to split pretrained CNN out of model
 39 |     To cache a lower dimension vector per frame to file
 40 |     # PC : pretrained CNN will be non-trainable now
 41 |     '''
 42 |     def build_cutoffmodel(self):
 43 |         base = ResNet50(include_top = False, weights='imagenet')
 44 |         # base = InceptionV3(include_top = False, weights='imagenet')
 45 |         self.co_model = base
 46 |         logger.debug("Building Cutoff Model")
 47 |         self.co_model.summary()
 48 |         self.co_model._make_predict_function()
 49 |         self.graph = tf.get_default_graph()
 50 |         logger.debug("Building Cutoff Model : Completed")
 51 |         return self.co_model
 52 | 
 53 |     # co == Cutoff Model
 54 |     def co_getoutshape(self, assert_model = None):
 55 |         # ResNet
 56 |         shape = (None,2048)
 57 |         ## Inception V3
 58 |         # shape = (None, 8*8*2048)
 59 |         logger.debug("Model Cutoff OutShape : %s" % str(shape))
 60 |         '''
 61 |         # Not in use
 62 |         if assert_model is not None:
 63 |             ashape = assert_model.output_shape
 64 |             sz = 1
 65 |             for x in ashape:
 66 |                 if x is not None:
 67 |                     sz = sz * x
 68 |             ashape = (None, sz)
 69 |             logger.debug("Assert Model Cutoff OutShape : %s" % str(ashape))
 70 |             assert shape == ashape
 71 |         '''
 72 |         assert len(shape) == 2
 73 |         assert shape[0] is None
 74 |         return shape
 75 | 
 76 |     def preprocess_partialmodel(self, frames):
 77 |         frames_in = np.asarray([image.img_to_array(frame) for frame in frames])
 78 |         frames_in = preprocess_input(frames_in)
 79 |         with self.graph.as_default():
 80 |             frames_out = self.co_model.predict(frames_in)
 81 |             frames_out = np.array([frame.flatten() for frame in frames_out])
 82 |         return frames_out
 83 | 
 84 |     def train_mode(self):
 85 |         K.set_learning_phase(1)
 86 | 
 87 |     def build_mcnn(self, CAPTION_LEN, VOCAB_SIZE, learning = True):
 88 |         if learning:
 89 |             self.train_mode()
 90 |         from backend.videohandler import VideoHandler
 91 |         logger.debug("Creating Model (CNN Cutoff) with Vocab Size :  %d " % VOCAB_SIZE)
 92 |         cmodel  = Sequential()
 93 |         cmodel.add(TimeDistributed(Dense(512,kernel_initializer='random_normal'), input_shape=(CAPTION_LEN+1,Vocab.OUTDIM_EMB )))
 94 |         cmodel.add(LSTM(512, return_sequences=True,kernel_initializer='random_normal'))
 95 |         cmodel.summary()
 96 |     
 97 |         input_shape_audio = VideoHandler.AUDIO_FEATURE
 98 |         amodel = Sequential()
 99 |         amodel.add(GRU(128,
100 |                      dropout=0.2,
101 |                      recurrent_dropout=0.2,
102 |                      return_sequences=True,
103 |                      input_shape=input_shape_audio))
104 |         amodel.add(BatchNormalization())
105 |         amodel.add(GRU(64,
106 |                      dropout=0.2,
107 |                      recurrent_dropout=0.2,
108 |                      return_sequences=True))
109 |         amodel.add(BatchNormalization())
110 |         amodel.add(Flatten())
111 |         amodel.add(RepeatVector(CAPTION_LEN + 1))
112 |         amodel.summary()
113 | 
114 |         input_shape_vid = self.co_getoutshape()
115 |         imodel = Sequential()
116 |         imodel.add(TimeDistributed(Dense(1024,kernel_initializer='random_normal'), input_shape=input_shape_vid))
117 |         imodel.add(TimeDistributed(Dropout(0.20)))
118 |         imodel.add(TimeDistributed(BatchNormalization(axis=-1)))
119 |         imodel.add(Activation('tanh'))
120 |         imodel.add(Bidirectional(GRU(1024, return_sequences=False, kernel_initializer='random_normal')))
121 |         imodel.add(RepeatVector(CAPTION_LEN + 1))
122 | 
123 |         imodel.summary()
124 | 
125 |         model = Sequential()
126 |         model.add(Merge([cmodel,amodel,imodel],mode='concat'))
127 |         model.add(TimeDistributed(Dropout(0.2)))
128 |         model.add(LSTM(1024,return_sequences=True, kernel_initializer='random_normal',recurrent_regularizer=l2(0.01)))
129 |         model.add(TimeDistributed(Dense(VOCAB_SIZE,kernel_initializer='random_normal')))
130 |         model.add(Activation('softmax'))
131 |         optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, decay=0)
132 |         model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
133 |         model.summary()
134 |         logger.debug("Model Created ResNet_D512L512_G128G64_D1024D0.25BN_BDGRU1024_D0.2L1024DVS")
135 |         self.model = model
136 |         return model
137 | 
138 |     def plot_model(self, filename):
139 |         from keras.utils import plot_model
140 |         plot_model(self.model, to_file=filename, show_shapes = True, show_layer_names = False)
141 |         print("Model Plotted in %s"%filename)
142 | 
143 | if __name__ == "__main__":
144 |     if sys.argv[1] == "plot_model":
145 |         from vocab import Vocab
146 |         vmodel = VModel(Vocab.CAPTION_LEN, Vocab.VOCAB_SIZE)
147 |         vmodel.plot_model(sys.argv[2])
148 | 


--------------------------------------------------------------------------------
/src/backend/parser.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | 
  4 | from common.logger import logger
  5 | from common.rpc import register_server, get_rpc, PORT
  6 | 
  7 | class Parser:
  8 |     def __init__(self):
  9 |         pass
 10 | 
 11 |     def init_framework(self, model_fname = None, train_mode = False):
 12 |         if not hasattr(self,'framework'):
 13 |             from backend.framework import Framework
 14 |             if model_fname is not None:
 15 |                 self.framework = Framework(model_load = model_fname, train_mode = train_mode)
 16 |             else:
 17 |                 self.framework = Framework(train_mode = train_mode)
 18 | 
 19 |     def parse(self):
 20 |         parser = argparse.ArgumentParser()
 21 |         parser.add_argument('command', choices=['train','predict','server','predict_all_model'])
 22 |         args = parser.parse_args(sys.argv[1:2])
 23 |         if args.command == 'train':
 24 |             self.train()
 25 |         if args.command == 'predict':
 26 |             self.predict()
 27 |         if args.command == 'server':
 28 |             self.server()
 29 |         if args.command == 'predict_all_model':
 30 |             self.predict_all_model()
 31 |         print(args.command)
 32 | 
 33 |     def train(self):
 34 |         logger.debug("Training Mode")
 35 |         self.init_framework(train_mode = True)
 36 |         self.framework.train_generator()
 37 | 
 38 |     def predict_all_model(self):
 39 |         import glob, os
 40 |         from backend.framework import Framework, MFNAME
 41 |  
 42 |         logger.debug("PredictAllModel Mode")
 43 |         result_dir = 'CombinedResults'
 44 |         os.system('mkdir -p %s' % result_dir)
 45 |         for fname in glob.glob(MFNAME+"_*"):
 46 |             save_file = result_dir + "/result_"+os.path.basename(fname)+"_.txt"
 47 |             if os.path.exists(save_file):
 48 |                 continue
 49 |             logger.debug("Working on model %s " % fname)
 50 |             self.framework = Framework(model_load = fname)
 51 |             self.framework.save_all(_ids = self.framework.get_testids(), save = save_file)
 52 |         logger.debug("Done")
 53 | 
 54 |     def predict(self):
 55 |         parser = argparse.ArgumentParser(prog = sys.argv[0]+" predict", description = 'Prediction Mode')
 56 |         parser.add_argument('dataset', choices=['train','test','save_all_test'], help='Video dataset for prediction')
 57 |         parser.add_argument('-c', '--count', type = int, default = 10)
 58 |         args = parser.parse_args(sys.argv[2:])
 59 | 
 60 |         logger.debug("Prediction Mode")
 61 |         self.init_framework()
 62 |         if args.dataset == 'train':
 63 |             _ids = self.framework.get_trainids(args.count)
 64 |         elif args.dataset == 'test':
 65 |             _ids = self.framework.get_testids(args.count)
 66 |         elif args.dataset == 'save_all_test':
 67 |             self.framework.save_all(_ids = self.framework.get_testids())
 68 |             return
 69 |         else:
 70 |             assert False
 71 |         self.framework.predict_model(_ids = _ids)
 72 | 
 73 |     def server(self):
 74 |         logger.debug("Server Mode")
 75 |         parser = argparse.ArgumentParser(prog = sys.argv[0]+" server", description = 'Server Mode')
 76 |         parser.add_argument('-i', '--init-only', help='Prepares early caches for faster execution', action='store_true')
 77 |         parser.add_argument('-s', '--start', help='Start RPC Server', action='store_true')
 78 |         parser.add_argument('-m', '--model', help='Model file')
 79 |         parser.add_argument('-pids', '--predict_ids',type=int, help='Obtain Results for given IDs', nargs='+')
 80 |         parser.add_argument('-pfs', '--predict_fnames', help='Obtain Results for given files', nargs='+')
 81 |         parser.add_argument('-cf', '--close_framework', help='Close Server Framework', action='store_true')
 82 |         args = parser.parse_args(sys.argv[2:])
 83 |         if args.init_only:
 84 |             self.init_framework()
 85 |             print("[RPC][Server][Init][Done]")
 86 |         elif args.start:
 87 |             model_fname = None
 88 |             if args.model:
 89 |                 model_fname = args.model
 90 |             self.init_framework(model_fname)
 91 |             register_server(self.framework)
 92 |         elif args.predict_ids:
 93 |             proxy = get_rpc()
 94 |             result = proxy.predict_ids( args.predict_ids )
 95 |             print(result)
 96 |         elif args.predict_fnames:
 97 |             proxy = get_rpc()
 98 |             result = proxy.predict_fnames( args.predict_fnames )
 99 |             print(result)
100 |         elif args.close_framework:
101 |             proxy = get_rpc()
102 |             proxy.close_framework()
103 |             print("[RPC][Send][close_framework]")
104 |         else:
105 |             parser.print_help()
106 | 
107 | if __name__ == "__main__":
108 |     Parser().parse()
109 | 


--------------------------------------------------------------------------------
/src/backend/plotepochlog.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import sys
 4 | 
 5 | MXPOINT = 100
 6 | assert len(sys.argv)>=2
 7 | fname=sys.argv[1]
 8 | showtrain = True
 9 | if  len(sys.argv)>=3:
10 |     showtrain = (sys.argv[2] == 'yo')
11 | showval = True
12 | if  len(sys.argv)>=4:
13 |     showval = (sys.argv[3] == 'yo')
14 | showepoch = True
15 | if len(sys.argv)>=5:
16 |     showepoch = (sys.argv[4] == 'yo')
17 | 
18 | print("Fname %s " % fname)
19 | 
20 | batch = []
21 | loss = []
22 | acc  = []
23 | val_loss = []
24 | val_acc  = []
25 | 
26 | ndata = []
27 | with open(fname,'r') as f:
28 |     for row in f:
29 |         rr =[float(x) for x in row.split(',')]
30 |         ndata.append(rr)
31 | 
32 | ndata = np.array(ndata, dtype='float')
33 | print(np.shape(ndata))
34 | step = 1
35 | if len(ndata[0]) > MXPOINT:
36 |     step = len(ndata[0]) // MXPOINT
37 | [batch, loss, acc, val_loss, val_acc,cider,bleu4,rouge,meteor] = [y[::step] for y in np.matrix.transpose(ndata)][:9]
38 | 
39 | x = range(len(batch))
40 | fig = plt.figure()
41 | host = fig.add_subplot(111)
42 | pscores = host.twinx()
43 | pacc = host.twinx()
44 | ploss = host.twinx()
45 | 
46 | if showepoch:
47 |     _b,=host.plot(x,batch,color= plt.cm.viridis(0.95),label='Batches')
48 | 
49 | if showtrain:
50 |     _a,=pacc.plot(x,acc,'-.',label="Accuracy",color= plt.cm.viridis(0))
51 |     _l,=ploss.plot(x,loss, '-', label="Loss", color = plt.cm.viridis(0))
52 | if showval:
53 |     ploss.plot(x,val_loss,'-', label="Val Loss",color= plt.cm.viridis(0.5))
54 |     pacc.plot(x,val_acc,'-.',label="Val Accuracy",color= plt.cm.viridis(0.5))
55 | if showtrain or showval:
56 |     ploss.legend(loc='lower right')
57 |     pacc.legend(loc='lower left')
58 |     ploss.spines['right'].set_position(('outward', 30))      
59 | 
60 | score_total = cider+bleu4+rouge+meteor
61 | pscores.plot(x,cider,'-', label="Cider",color= plt.cm.viridis(0.0))
62 | pscores.plot(x,bleu4,'-', label="Bleu4",color= plt.cm.viridis(0.2))
63 | pscores.plot(x,rouge,'-', label="Rouge",color= plt.cm.viridis(0.4))
64 | pscores.plot(x,meteor,'-', label="Meteor",color= plt.cm.viridis(0.6))
65 | pscores.plot(x,score_total,'-', label="Total",color= plt.cm.viridis(0.8))
66 | pscores.legend(loc='upper left')
67 | 
68 | 
69 | #host.yaxis.label.set_color(_b.get_color())
70 | #ploss.yaxis.label.set_color(_l.get_color())
71 | #pacc.yaxis.label.set_color(_a.get_color())
72 | 
73 | #plt.savefig("plot.png", bbox_inches='tight')
74 | 
75 | best_iter = np.argmax(score_total)
76 | print("Best Iteration %d " % best_iter)
77 | print("\tCIDER  %.4f " % cider[best_iter])
78 | print("\tBLEU4  %.4f " % bleu4[best_iter])
79 | print("\tROUGE  %.4f " % rouge[best_iter])
80 | print("\tMETEOR %.4f " % meteor[best_iter])
81 | print("\tTotalScore %.4f " % score_total[best_iter])
82 | 
83 | 
84 | 
85 | plt.show()
86 | 


--------------------------------------------------------------------------------
/src/backend/pred.sh:
--------------------------------------------------------------------------------
1 | python framework.py -predict 882,527155,480723,267111,393362
2 | 


--------------------------------------------------------------------------------
/src/backend/preprocess.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import json
  3 | import os
  4 | import numpy as np
  5 | import pickle
  6 | import random
  7 | import re 
  8 | import shutil
  9 | import sys
 10 | 
 11 | from sets import Set
 12 | 
 13 | from keras import callbacks
 14 | from keras.applications import imagenet_utils 
 15 | from keras.preprocessing import image
 16 | from keras.preprocessing import sequence
 17 | 
 18 | from common.logger import logger
 19 | 
 20 | ROOT_DIR = '/home/gagan.cs14/btp'
 21 | GITBRANCH = os.popen('git branch | grep "*"').read().split(" ")[1][:-1] 
 22 | GITBRANCHPREFIX = "/home/gagan.cs14/btp_"+GITBRANCH+"/"
 23 | # Parameters
 24 | CAPTION_LEN = 10
 25 | MAX_WORDS = 400000
 26 | OUTENCODINGGLOVE = False
 27 | 
 28 | os.chdir(ROOT_DIR)
 29 | BADLOGS = GITBRANCHPREFIX+"badlogs.txt"
 30 | FILENAME_CAPTION = 'ImageDataset/annotations/captions_train2014.json'
 31 | DIR_IMAGES = 'ImageDataset/train2014/'
 32 | DIR_IMAGESP = 'ImageDataset/processed/'
 33 | VOCAB_FILE = GITBRANCHPREFIX+"vocab.dat"
 34 | GLOVE_FILE = 'glove/glove.6B.100d.txt'
 35 | OUTDIM_EMB = 100
 36 | USE_GLOVE = True
 37 | WORD_MIN_FREQ = 5
 38 | def get_image_fname(_id):
 39 |     return '%sCOCO_train2014_%012d.jpg' % (DIR_IMAGES, _id)
 40 | 
 41 | 
 42 | vocab = Set([])
 43 | v_ind2word = {}
 44 | v_word2ind = {}
 45 | VOCAB_SIZE = [0]
 46 | 
 47 | embeddingLen = None
 48 | 
 49 | #embeddingMatrix = np.zeros((MAX_WORDS, 100))
 50 | #EMBEDDING_FILE = 'embedding'
 51 | #embeddingMatrixRef = [ embeddingMatrix ]
 52 | #################################################ADD GIT BRANCH
 53 | ICAPPF = GITBRANCHPREFIX+'imcap.dat'
 54 | embeddingIndex = {}
 55 | EMBEDDINGI_FILE = GITBRANCHPREFIX+'embeddingIScaled5'
 56 | EMBEDDING_OUT_SCALEFACT = 5 #(-4.0665998, 3.575) needs to be mapped to -1 to +1
 57 | embeddingIndexRef = [ embeddingIndex ]
 58 | 
 59 | 
 60 | def createDirs():
 61 |     try: 
 62 |         os.makedirs(GITBRANCHPREFIX)
 63 |         os.makedirs(ROOT_DIR + '/' + DIR_IMAGESP)
 64 |     except OSError:
 65 |         if not os.path.isdir(GITBRANCHPREFIX):
 66 |             raise
 67 | 
 68 | def badLogs(msg):
 69 |     print(msg)
 70 |     with open(BADLOGS,"a") as f:
 71 |         f.write(msg)
 72 | 
 73 | '''
 74 | def addToVocab(w):
 75 |     global VOCAB_SIZE
 76 |     vocab.add(w)
 77 |     v_ind2word[ VOCAB_SIZE ] = w
 78 |     v_word2ind[ w ] = VOCAB_SIZE
 79 |     if not isEmbeddingPresent:
 80 |         if w in embeddingIndex.keys():
 81 |             embeddingMatrix[VOCAB_SIZE] = embeddingIndex[w]
 82 |             print(embeddingMatrix[VOCAB_SIZE])
 83 |     if VOCAB_SIZE<10:
 84 |         print("%d : %s" % (VOCAB_SIZE, w))
 85 |     VOCAB_SIZE += 1
 86 |     return VOCAB_SIZE-1
 87 | '''
 88 | '''
 89 | Add NULL Word
 90 | Add NonVocab Word
 91 | '''  
 92 | ENG_SOS = ">"
 93 | ENG_EOS = "<"
 94 | ENG_EXTRA = "___"
 95 | ENG_NONE = "?!?"
 96 | 
 97 | 
 98 | '''
 99 | def iniVocab():
100 |     global W_SOS,W_EOS
101 |     #addToVocab("none")
102 |     #addToVocab("extra")
103 |     #W_SOS = addToVocab(ENG_SOS)
104 |     #W_EOS = addToVocab(ENG_EOS)
105 | '''
106 | 
107 | 
108 | def build_gloveVocab():
109 |     logger.debug("Started")
110 |     if len(embeddingIndexRef[0].keys()) > 0:
111 |         logger.debug("Embedding Already Present %d " % len(embeddingIndexRef[0].keys()))
112 |         return
113 |     isEmbeddingPresent = os.path.exists(EMBEDDINGI_FILE)
114 |     print("Embedding Present %s " % isEmbeddingPresent)
115 |     if isEmbeddingPresent:
116 |         
117 |         '''with open(EMBEDDING_FILE,'r') as f:
118 |             global embeddingMatrix
119 |             embeddingMatrix = pickle.load(f)
120 |             embeddingMatrixRef[0] = embeddingMatrix
121 |         '''
122 |         minVal = float('inf')
123 |         maxVal = -minVal
124 |         with open(EMBEDDINGI_FILE,'r') as f:
125 |             global embeddingIndex
126 |             embeddingIndex = pickle.load(f)
127 |             embeddingIndexRef[0] = embeddingIndex
128 |             for v in embeddingIndex.values():
129 |                 for x in v:
130 |                     minVal = min(minVal,x)
131 |                     maxVal = max(maxVal,x)
132 |             #print("minVal, maxVal %s " % str((minVal,maxVal)))
133 |             #exit()
134 |         print("Embedding Loaded")
135 |     else:
136 |         with open(GLOVE_FILE,'r') as f:
137 |             for i,line in enumerate(f):
138 |                 tokens = line.split()
139 |                 #print(tokens)
140 |                 tokens = [tok.__str__() for tok in tokens]
141 |                 #print(tokens)
142 |                 #exit()
143 |                 #if i==200:
144 |                 #    break
145 |     
146 |                 word = tokens[0]
147 |                 #embeddingLen = len(tokens)-1
148 |                 if word == "none":
149 |                     print("YoFound you")
150 |                 if i<5:
151 |                     print(word)
152 |                     #print(tokens[1:])
153 |                 embeddingIndex[word] = np.asarray(tokens[1:], dtype='float32') * (1.0/EMBEDDING_OUT_SCALEFACT)
154 |                 #print(embeddingIndex[word])
155 |                 #exit()
156 |             #exit()
157 |         assert isEmbeddingPresent == False
158 |         isEmbeddingPresent = True
159 |         #with open(EMBEDDING_FILE,'w') as f:
160 |         #    pickle.dump(embeddingMatrix,f)
161 |         with open(EMBEDDINGI_FILE,'w') as f:
162 |             pickle.dump(embeddingIndex,f)
163 |         print("Embedding Saved!")
164 | 
165 |     #iniVocab()
166 |     logger.debug("Completed")
167 |             
168 |            
169 | '''
170 | def op_on_caption(cap):
171 |     for w in cap.split(' '):
172 |         w = w.lower()
173 |         if w not in vocab:
174 |             v_word2ind[ w ] = addToVocab(w)
175 | '''
176 | def build_image_caption_pair():
177 |     if os.path.exists(ICAPPF):
178 |         with open(ICAPPF,'r') as f:
179 |             x,mywords = pickle.load(f)
180 |             print("Image Caption Pair Data Model Loaded")
181 |         return x,mywords
182 |     
183 |     x = {}
184 |     logger.debug("Started")
185 |     wordFreq = {}
186 |     uwords = set([])
187 |     with open(FILENAME_CAPTION) as f:
188 |         captions = json.load(f)['annotations']
189 |         count = 0
190 |         for cap in captions:
191 |             cap['caption']= re.sub('[^a-zA-Z]+', ' ', cap['caption'].encode('utf-8')).lower()
192 |             for w in cap['caption'].split(' '):
193 |                 if w in uwords:
194 |                     wordFreq[w]+=1
195 |                 else:
196 |                     wordFreq[w] =1
197 |                     uwords.add(w)
198 |             #op_on_caption(cap['caption'])
199 |             if True or count < 100:
200 |                 x[cap['image_id']] = cap['caption']
201 |             count+=1
202 |     #nmywords = wordFreq.keys()
203 |     #sorted(nmywords, key=lambda key: wordFreq[key], reverse=True)
204 |     #print(wordFreq.keys())
205 |     mywords = set([w for w in wordFreq.keys() if wordFreq[w]>=WORD_MIN_FREQ])
206 |     mywords.add(ENG_SOS)
207 |     mywords.add(ENG_EOS)
208 |     mywords.add(ENG_NONE)
209 |     mywords.add(ENG_EXTRA)
210 | 
211 |     #print(mywords)
212 |     print(len(mywords))
213 |     #mywords = mywords[:WORD_TOP]
214 |     with open(ICAPPF,'w') as f:
215 |         pickle.dump([x,mywords],f)
216 |         print("Image Caption Pair Data Model Saved")
217 |         
218 |     
219 |     logger.debug("Completed, Vocab Size NONE   ")#%len(v_word2ind))
220 |     return (x,mywords)
221 | 
222 | #def rgb2gray(rgb):
223 | #    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])
224 | 
225 | def imageToVec(_id):
226 |     NEED_W = 224
227 |     NEED_H = 224
228 |     if type("")==type(_id):
229 |         fname = _id
230 |     else:
231 |         fname = get_image_fname(_id)
232 |     #afname = DIR_IMAGESP + fname.split('/')[-1] + '.pickle'
233 |     #if os.path.exists(afname):
234 |     #    with open(afname,'r') as f:
235 |     #        return pickle.load(f)
236 |     #print(fname)
237 |     img = image.load_img(fname, target_size=(NEED_H, NEED_W))
238 |     x = image.img_to_array(img)
239 |     x /= 255.
240 |     x -= 0.5
241 |     x *= 2.
242 |     x = np.asarray(x)
243 |     #with open(afname,'w') as f:
244 |     #    pickle.dump(x,f)
245 |     return x
246 | 
247 |     ############################################ REMOVE HERE ###
248 |     #img.save("temp.jpg")
249 |     #img = cv2.imread(fname)
250 |     #print(img)
251 |     #img = cv2.resize(img, (NEED_H, NEED_W))
252 |     #cv2.imwrite('test.jpg',img)
253 |     #img = np.asarray(img)
254 |     #print("Shape %s " % (str(np.shape(img))))
255 |     #cv2.imwrite('temp.jpg',img)
256 |     #vec = np.asarray(img)
257 |     #if not vec.any():
258 |     #    badLogs("All zero for %s\n" % str(_id))
259 |     #vec = vec/255.0
260 |     #return vec
261 |     #bw = rgb2gray(img)
262 |     #print("BW Shape %s " % (str(np.shape(bw))))
263 | 
264 | 
265 | def getWord2Ind(w):
266 |     w=w.lower()
267 |     if w not in v_word2ind.keys():
268 |         w=ENG_EXTRA
269 |     #print(w)
270 |     return v_word2ind[w]
271 | 
272 | def word2embd(word):
273 |     if word not in embeddingIndexRef[0].keys():
274 |         word = ENG_EXTRA
275 |     return embeddingIndexRef[0][word]
276 | 
277 | def embdToWord(embd):
278 |     bestWord = None
279 |     distance = float('inf')
280 |     for word in embeddingIndex.keys():
281 |         e=embeddingIndex[word]
282 |         d = 0
283 |         for a,b in zip(e,embd):
284 |             d+=(a-b)*(a-b)
285 |         if d<distance:
286 |             distance=d
287 |             bestWord = word
288 |     assert(bestWord is not None)
289 |     return (bestWord, distance)
290 | 
291 | def WordToWordDistance(word1,word2):
292 |     vec1 = word2embd(word1)
293 |     vec2 = word2embd(word2)
294 |     d = 0
295 |     for a,b in zip(vec1,vec2):
296 |         d+=(a-b)*(a-b)
297 |     return d
298 |        
299 | 
300 | def onehot(vind):
301 |     #print("Vobab Size %d  Ind %d " % (VOCAB_SIZE[0],vind))
302 |     t =  [0]*VOCAB_SIZE[0]
303 |     t[vind] = 1
304 |     return t
305 | 
306 | def wordToEncode(w, encodeType = None):
307 |     if encodeType is None:
308 |         encodeType = "glove"
309 |     if encodeType == "glove":
310 |         return word2embd(w)
311 |     else:
312 |         return onehot(getWord2Ind(w))
313 | 
314 | def captionToVec(cap, addOne=False, oneHot=False):
315 |     l = CAPTION_LEN
316 |     if addOne:
317 |         l = l+1
318 |     #print([w.lower() for w in cap.split(' ')])
319 |     cap = cap.lower().split(' ')
320 |     #print(cap)
321 |     cap = cap[:l]
322 |     if len(cap)<l:
323 |         cap.append(ENG_EOS)
324 |     while len(cap)<l:
325 |         cap.append(ENG_NONE)
326 | 
327 |     if oneHot:
328 |         vec = [wordToEncode(w,"onehot") for w in cap]
329 |     else:
330 |         vec = [wordToEncode(w,"glove") for w in cap]
331 |     return vec
332 | 
333 | 
334 | def get_image_caption(_id, lst):
335 |     cap = lst[_id]
336 |     capVec = captionToVec(cap, oneHot=False)
337 |     capVecOneHot = captionToVec(cap, oneHot=True)
338 |     img = imageToVec(_id)
339 |     return np.asarray([img,capVec,capVecOneHot])
340 | 
341 | def build_vocab():
342 |     lst,topwords = build_image_caption_pair()
343 |     if os.path.exists(VOCAB_FILE):
344 |         with open(VOCAB_FILE,'r') as f:
345 |             [v_ind2word_,v_word2ind_, VOCAB_SIZE[0]] = pickle.load(f)
346 |             v_ind2word.clear()
347 |             v_ind2word.update(v_ind2word_)
348 |             v_word2ind.clear()
349 |             v_word2ind.update(v_word2ind_)
350 |             print("Vocab Model Loaded")
351 |     else:
352 |         build_gloveVocab()
353 | 
354 |         assert len(embeddingIndex.keys())>0
355 |         assert len(v_ind2word) == 0
356 |         assert len(v_word2ind) == 0
357 |         counter = 0
358 |         for w in embeddingIndex.keys():
359 |             if w in topwords:
360 |                 v_ind2word[counter]=w
361 |                 v_word2ind[w]=counter
362 |                 counter += 1
363 |                 # ENG_* words present in embeddingIndex and topwords
364 |         VOCAB_SIZE[0] = counter
365 |         #print("Embedding Index Len %d " % len(embeddingIndex.keys()))
366 |         #exit()
367 |         print("TOPWords %d " % len(topwords))
368 |         print("Embeddding Words %d " % len(embeddingIndex.keys()))
369 |         print("Cal Vocab Size %d " % VOCAB_SIZE[0])
370 | 
371 |         with open(VOCAB_FILE,'w') as f:
372 |             pickle.dump([v_ind2word,v_word2ind, VOCAB_SIZE[0]],f)
373 |             print("Vocab Model Saved")
374 |     
375 |     assert ENG_SOS in v_word2ind.keys()
376 |     assert ENG_EOS in v_word2ind.keys()
377 |     assert ENG_NONE in v_word2ind.keys()
378 |     assert ENG_EXTRA in v_word2ind.keys()
379 |     print("Vocabulary Size %d for %d captions" % (VOCAB_SIZE[0], len(lst)))
380 |     return lst
381 |     
382 | def feed_image_caption(_id,lst):
383 |     img,capGl,capOH = get_image_caption(_id,lst)
384 |     # Glove
385 |     we_sos = [word2embd(ENG_SOS)]
386 |     we_eos = [word2embd(ENG_EOS)]
387 |     # One Hot
388 |     we_eosOH = [wordToEncode(ENG_EOS,encodeType="onehot")]
389 |     return ( (we_sos+list(capGl)), img, (list(capOH) + we_eosOH))
390 |   
391 | def datas_from_ids(idlst,lst):
392 |     images = []
393 |     capS   = []
394 |     capE   = []
395 |     for _id in idlst:
396 |         _capS,_img,_capE = feed_image_caption(_id,lst)
397 |         images.append(_img)
398 |         capS.append(_capS)
399 |         capE.append(_capE)
400 |     return [[np.asarray(capS),np.asarray(images)],np.asarray(capE)]
401 |  
402 | # Train for batch order 0,0,1,0,1,2,0,1,2,3,4,0,1,2,3,4,5..
403 | def data_generator(lst, batch_size, start=0, isTrainSet = True):
404 |     count = (len(lst.keys()))//batch_size
405 |     #print("Max Unique Batches %d " % count)
406 |     countValidation = 5#100
407 |     countTrain = count - 100
408 |     print("Validation Data : %d , Train Batches %d, BatchSize %d\tBatchOffset : %d" % (countValidation, countTrain, batch_size, start))
409 |     offset = 0
410 |     left = countTrain
411 |     extra = 0
412 |     #start = 0
413 |     if not isTrainSet:
414 |         # Validation Data
415 |         left = countValidation
416 |         offset = countTrain * batch_size
417 |         idlst = lst.keys()[offset:offset+left]
418 |         yield datas_from_ids(idlst,lst)
419 |         return
420 |     # Training Data
421 |     maxSequenceLength = countTrain*(countTrain+1)//2
422 |     cbatch = 1
423 |     batchId = 1
424 |     iterBatch = 0
425 | 
426 |     for it in range(maxSequenceLength):
427 |         if batchId == cbatch:
428 |             batchId = 1  
429 |             cbatch *= 2
430 |             if cbatch > countTrain:
431 |                 cbatch = countTrain
432 |         else:
433 |             batchId += 1
434 | 
435 |         iterBatch+=1
436 |         if iterBatch<=start:
437 |             continue
438 |         idlst = lst.keys()[(batchId-1)*batch_size:(batchId)*batch_size]
439 |         print("Batch Id %d Loaded" % (batchId-1))
440 |         yield datas_from_ids(idlst,lst)
441 |     return
442 | 
443 | def build_dataset(lst, batch_size = -1, val_size = 0,outerepoch=random.randint(0,10000)):
444 |     logger.debug("Started")
445 | 
446 |     #_id = lst.keys()[0]
447 |     #imageToVec(_id)
448 |     #capVec =  captionToVec(lst[_id])
449 |     #print(capVec)
450 |     #print("Shape of CapVec %s " % str(np.shape(capVec)))
451 |     train_set = []
452 |     val_set = []
453 |     if batch_size == -1:
454 |         for i,_id in enumerate(lst.keys()):
455 |             if i > 100:
456 |                 break
457 |             train_set.append( get_image_caption(_id,lst))
458 |     else:
459 |         tsize = batch_size
460 |         count = (len(lst.keys())-val_size)//tsize
461 |         print("Max Unique Outer Batches %d " % count)
462 |         outerepoch = outerepoch%count
463 |         oinds = outerepoch*tsize
464 |         einds = (outerepoch+1)*tsize
465 |         mylst = lst.keys()[oinds:einds]
466 |         mylst.extend(lst.keys()[-val_size-1:])
467 |         mx = len(mylst)
468 |         #mx = 1000 #########HERE###########
469 |         splitKey =  tsize #int(mx*0.9)
470 | 
471 |         print("Max Keys %d\tSplit keys %d" % (mx, splitKey))
472 |         todolist = [("Train set",train_set, batch_size,0,splitKey),("Validation Set",val_set, val_size,splitKey,mx-splitKey)]
473 |         for (s,cset, batchsz, offset, datasz) in todolist:
474 |             #indicies = np.random.choice(datasz, batchsz, replace=False)
475 |             #indicies = indicies + offset
476 |             for c,_id in enumerate(mylst[offset:(datasz+offset)]):# enumerate(indicies):
477 |                 #_id = lst.keys()[i]
478 |                 capimg = get_image_caption(_id,lst)
479 |                 #if c==0:
480 |                 #    print("%s First Image Id %s with caption : %s " % (s,str(_id), capimg[0]))
481 |                 cset.append(capimg)
482 |                 if (c*100)%batchsz == 0:
483 |                     print("%s %d %% Loaded!" % (s, c*100/batchsz))
484 |     print("BS %d, VS %d " % (batch_size, val_size))
485 |     print("Shape of Training Set %s " % str(np.shape(train_set)))
486 |     print("Shape of Validation Set %s " % str(np.shape(val_set)))
487 |     logger.debug("Completed")
488 |     return [train_set, val_set]
489 | 
490 | '''
491 | def train_generator(dataset):
492 |     i = 0
493 |     while i<len(dataset) and i<1:
494 |         out = ( (dataset[i][0], dataset[i][1]), dataset[i][0] )
495 |         print(out)
496 |         yield out#((Caption,Image),Caption)
497 |         i+=1
498 | '''
499 | 


--------------------------------------------------------------------------------
/src/backend/run.sh:
--------------------------------------------------------------------------------
1 | ( while true;do echo "Starting Framework!";unbuffer python framework.py;test $? -gt 128 && break; done ) | tee -a logs.txt
2 | 


--------------------------------------------------------------------------------
/src/backend/test_on_trained.sh:
--------------------------------------------------------------------------------
1 | python framework.py -p_ids 1369,2942,1463,3126,3594,2352,1498,2120
2 | 


--------------------------------------------------------------------------------
/src/backend/train.py:
--------------------------------------------------------------------------------
 1 | from backend.data import getVideo,sz_videos, get_videoId, getCaption
 2 | 
 3 | def get_model():
 4 | 	# -_-
 5 | 	pass    
 6 | 
 7 | print(sz_videos())
 8 | vId = get_videoId(10)
 9 | print(getCaption(vId))
10 | frames = getVideo(vId)
11 | print("Number of frames %d " % (len(frames)))
12 | #cv2.namedWindow( "Display window", cv2.WINDOW_NORMAL )
13 | #for img in frames:
14 | #    cv2.imshow( "Display window", img );
15 | #cv2.destroyAllWindows()
16 |     
17 | 
18 | 


--------------------------------------------------------------------------------
/src/backend/utils.py:
--------------------------------------------------------------------------------
1 | import re
2 | 
3 | def caption_tokenize(caption):
4 |     caption = re.sub('[^a-zA-Z]+', ' ', caption).lower()
5 |     caption = caption.split()
6 |     return caption
7 | 


--------------------------------------------------------------------------------
/src/backend/videohandler.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import cv2
  3 | import time
  4 | import json
  5 | import numpy as np
  6 | import shutil
  7 | import os
  8 | import six.moves.urllib as urllib
  9 | import six.moves.urllib.parse as urllibparse
 10 | import librosa
 11 | 
 12 | from pprint import pprint
 13 | 
 14 | from common.config import get_app_config
 15 | 
 16 | DIR_VIDEO_DATASET = get_app_config()["VIDEOS_DATASET"]
 17 | 
 18 | 
 19 | class VideoHandler:
 20 |     LIMIT_FRAMES = 40
 21 |     AUDIO_FEATURE = (80, 40) #  TimeSamples, n_mfcc
 22 | 
 23 |     # ResNet
 24 |     SHAPE = (224, 224)
 25 |     ## InceptionV3
 26 |     # SHAPE = (299, 299)
 27 | 
 28 |     s_fname_train = "videodatainfo_2017.json"
 29 |     s_fname_test = "test_videodatainfo_2017.json"
 30 |     SLEEPTIME = 30
 31 |     STHRES = 10*1024
 32 |     EXTRACT_COUNTER = 0 # For multiprocessing
 33 |     
 34 |     def __init__(self, s_fname_train, s_fname_test):
 35 |         self.splitTrainValid = [95,5] # Out of 100
 36 |         self.fname_train = os.path.join(DIR_VIDEO_DATASET, s_fname_train)
 37 |         self.fname_test = os.path.join(DIR_VIDEO_DATASET, s_fname_test)
 38 |         self.vdir = os.path.join(DIR_VIDEO_DATASET, "videos")
 39 |         self.cdir = os.path.join(DIR_VIDEO_DATASET, "cache_"+str(self.LIMIT_FRAMES)+"_"+("%dx%d" % VideoHandler.SHAPE))
 40 |         self.adir = os.path.join(DIR_VIDEO_DATASET, "cache_audio_"+("%dx%d" % VideoHandler.AUDIO_FEATURE))
 41 |         self.tdir = os.path.join(self.vdir, "extract")
 42 |         self.logfile = os.path.join(DIR_VIDEO_DATASET, "log.txt")
 43 |         if not os.path.exists(self.vdir):
 44 |             os.mkdir(self.vdir)
 45 |         if os.path.exists(self.tdir):
 46 |             shutil.rmtree(self.tdir)
 47 |         os.mkdir(self.tdir)
 48 |         if not os.path.exists(self.cdir):
 49 |             os.mkdir(self.cdir)
 50 |         if not os.path.exists(self.adir):
 51 |             os.mkdir(self.adir)
 52 |         self.build_captions()
 53 | 
 54 |     def build_captions(self):
 55 |         with open(self.fname_train) as f:
 56 |             data_train = json.load(f)
 57 |         with open(self.fname_test) as f:
 58 |             data_test = json.load(f)
 59 |         self.vdata = dict()
 60 |         for item in data_train['videos']:
 61 |             self.vdata[item['id']] = item
 62 |         for item in data_test['videos']:
 63 |             self.vdata[item['id']] = item
 64 | 
 65 |         # id => [caption]
 66 |         self.captions = dict()
 67 |         idcreated = set()
 68 |         # Training Set
 69 |         for sen in data_train['sentences']:
 70 |             _id = self.stringIdToInt(sen['video_id'])
 71 |             if _id not in idcreated:
 72 |                 idcreated.add(_id)
 73 |                 self.captions[_id] = []
 74 |             self.captions[_id].append(sen['caption'])
 75 |         self.train_ids = list(idcreated)
 76 | 
 77 |         idcreated = set()
 78 |         # Test Set
 79 |         for sen in data_test['sentences']:
 80 |             _id = self.stringIdToInt(sen['video_id'])
 81 |             if _id not in idcreated:
 82 |                 idcreated.add(_id)
 83 |                 self.captions[_id] = []
 84 |             self.captions[_id].append(sen['caption'])
 85 |         self.test_ids = list(idcreated)
 86 | 
 87 |     def set_vmodel(self,vmodel):
 88 |         self.vmodel = vmodel
 89 | 
 90 |     def getCaptionData(self):
 91 |         return self.captions
 92 | 
 93 |     def stringIdToInt(self,sid):
 94 |         assert(sid[:5]=='video')
 95 |         return int(sid[5:])
 96 | 
 97 |     def getAllIds(self):
 98 |         return self.captions.keys()
 99 | 
100 |     def getDownloadedIds(self):
101 |         allfiles = os.listdir(self.vdir)
102 |         vfiles = []
103 |         for f in allfiles:
104 |             # Issue: getDownloadedIds is called before creation of *_ignore file
105 |             #       Program crases onces for creating these files then works normally
106 |             if f.endswith(".mp4") and (not os.path.exists(os.path.join(self.vdir,f+"_ignore"))):
107 |                 if os.path.getsize("%s/%s" % (self.vdir,f)) >= VideoHandler.STHRES:
108 |                     vfiles.append(int(f[:-4]))
109 |         return vfiles
110 | 
111 |     def filterMod100(self, parentlst, lst, _min, _max):
112 |         parentlst = set(parentlst)
113 |         lst = set(lst)
114 |         flst = lst.intersection(parentlst)
115 |         lst = list(flst)
116 |         ids = []
117 |         for i,_id in enumerate(lst):
118 |             if (i%100)>=_min and (i%100)<_max:
119 |                 ids.append(_id)
120 |         return ids
121 |         
122 |     def getTrainingIds(self):
123 |         return self.filterMod100(self.get_otrain_ids(), self.getDownloadedIds(), 0, self.splitTrainValid[0])
124 | 
125 |     def getValidationIds(self):
126 |         return self.filterMod100(self.get_otrain_ids(), self.getDownloadedIds(), self.splitTrainValid[0],100)
127 | 
128 |     def getTestIds(self):
129 |         return self.filterMod100(self.get_otest_ids(), self.getDownloadedIds(), 0, 100)
130 | 
131 |     def get_otrain_ids(self):
132 |         return self.train_ids
133 | 
134 |     def get_otest_ids(self):
135 |         return self.test_ids
136 | 
137 |     def getYoutubeId(self,url):
138 |         query = urllibparse.parse_qs(urllibparse.urlparse(url).query)
139 |         print(query)
140 |         return query['v'][0]
141 | 
142 |     def downloadVideo(self, _id, logs = True):
143 |         video = self.vdata[_id]
144 |         url = video['url']
145 |         stime = video['start time']
146 |         etime = video['end time']
147 |         sfname = "%s/%d.mp4" % (self.vdir, _id)
148 |         if os.path.exists(sfname):
149 |             if logs:
150 |                 print("Video Id [%d] Already Downloaded" % _id)
151 |             return sfname
152 |         youtubeId = self.getYoutubeId(url)
153 |         turl = "curl 'https://hesetube.com/download.php?id=%s'" % (youtubeId)
154 |         durl = "https://hesetube.com/video/%s.mp4?start=%f&end=%f" % (youtubeId, stime, etime) 
155 |         print(durl)
156 |         print(turl)
157 |         os.system(turl)
158 |         cont = urllib.urlopen(durl).read()
159 |         with open(sfname,"wb") as f:
160 |             f.write(cont)
161 |             print("Video Id [%d] Downloaded : %s " % (_id, youtubeId))
162 |         fs = os.path.getsize(sfname)
163 |         if fs < VideoHandler.STHRES:
164 |             print("Crosscheck failed, File Size : %d" % fs)
165 |             with open(self.logfile,"a") as f:
166 |                 f.write("Crosscheck file %d, %s with size %d\n" % (_id, youtubeId, fs))
167 |             os.remove(sfname)
168 |             open(sfname,'a').close()
169 |             self.takebreak()
170 |             return None
171 |         else:
172 |             self.takebreak()
173 |             return sfname
174 | 
175 |     def takebreak(self):
176 |         time.sleep(VideoHandler.SLEEPTIME)
177 | 
178 |     '''
179 |     Either frames of video from id or vfname
180 |     '''
181 |     CRAZY = 0
182 |     #@synchronized
183 |     def get_crazy_id(self):
184 |        VideoHandler.EXTRACT_COUNTER += 1
185 |        return VideoHandler.EXTRACT_COUNTER
186 |         
187 |     def get_iframes_cached(self, _id):
188 |         cfname = "%s/%d.npy" % (self.cdir, _id)
189 |         if os.path.exists(cfname):
190 |             f = open(cfname, 'rb')
191 |             frames = np.load(f)
192 |             assert len(frames) == self.LIMIT_FRAMES
193 |             return frames
194 |         return None
195 | 
196 |     def get_audio_cached(self, _id):
197 |         afname = "%s/%d.npy" % (self.adir, _id)
198 |         if os.path.exists(afname):
199 |             f = open(afname, 'rb')
200 |             feature = np.load(f)
201 |             if np.shape(feature) != self.AUDIO_FEATURE:
202 |                 print("Feature Shape error at %d, %s" % (_id, np.shape(feature)))
203 |             assert np.shape(feature) == self.AUDIO_FEATURE
204 |             return feature
205 |         return None
206 | 
207 |     def cached_iframe(self, _id, frames):
208 |         cfname = "%s/%d.npy" % (self.cdir, _id)
209 |         print("Cached %s" % cfname)
210 |         with open(cfname, 'wb') as f:
211 |             np.save(f,frames)
212 | 
213 |     def cached_audio(self, _id, feature):
214 |         afname = "%s/%d.npy" % (self.adir, _id)
215 |         print("Cached %s" % afname)
216 |         with open(afname, 'wb') as f:
217 |             np.save(f,feature)
218 | 
219 |     def file_to_videofeature(self, sfname):
220 |         vcap = cv2.VideoCapture(sfname)
221 |         success, frame = vcap.read()
222 |         allframes = []
223 |         while True:
224 |             success, frame = vcap.read()
225 |             if not success:
226 |                 break
227 |             allframes.append(cv2.resize(frame, VideoHandler.SHAPE))
228 |         if len(allframes) < self.LIMIT_FRAMES:
229 |             print("File [%s] with limited frames (%d)" % (sfname, len(allframes)))
230 |             # Ignore those videos
231 |             os.system("touch %s_ignore" % sfname)
232 |             return None
233 | 
234 |         period = len(allframes) // self.LIMIT_FRAMES
235 |         rframes = allframes[:period * self.LIMIT_FRAMES:period]
236 |         frames_out = self.vmodel.preprocess_partialmodel(rframes)
237 |         return frames_out
238 | 
239 |     def file_to_audiofeature(self, sfname):
240 |         audio_y, sr = librosa.load(sfname)
241 |         afeatures = librosa.feature.mfcc(y=audio_y, sr=sr, n_mfcc=self.AUDIO_FEATURE[1])
242 |         afeatures = np.transpose(afeatures)
243 |         ll = len(afeatures)
244 |         parts = ll//self.AUDIO_FEATURE[0]
245 |         division = []
246 |         for i in range(self.AUDIO_FEATURE[0] - 1):
247 |             division.append((i+1)*parts)
248 |         for i in range(ll%self.AUDIO_FEATURE[0]):#left over
249 |             division[i]+=1           
250 |         afeatures = np.split(np.array(afeatures), division)
251 |         afeature_out = []
252 |         for af in afeatures:
253 |             afeature_out.append(np.mean(np.array(af),axis = 0))
254 |         afeature_out = np.asarray(afeature_out)
255 |         if np.shape(afeature_out) != self.AUDIO_FEATURE:
256 |             print("File [%s] with audio problem (%s)" % (sfname, str(np.shape(afeature_out))))
257 |             # Ignore videos
258 |             os.system("touch %s_ignore" % sfname)
259 |         return afeature_out
260 | 
261 |     # (Video Feature, Audio Feature)
262 |     def get_iframes_audio(self, _id = None, sfname = None, logs = True, cache_id = None):
263 |         assert (_id is None) ^ (sfname is None)
264 |         # Load if cached
265 |         frames_out = None
266 |         afeature_out = None
267 |         if _id is not None or cache_id is not None:
268 |             if _id is not None:
269 |                 cache_id = _id
270 |             frames_out = self.get_iframes_cached(cache_id)
271 |             afeature_out = self.get_audio_cached(cache_id)
272 |             if frames_out is not None and afeature_out is not None:
273 |                 return (frames_out, afeature_out)
274 |         # Load frames from file
275 |         if sfname is None:
276 |             sfname = self.downloadVideo(_id, logs)
277 |         if sfname is None:
278 |             return None
279 | 
280 |         to_cache_video = False
281 |         to_cache_audio = False
282 | 
283 |         if frames_out is None:
284 |             frames_out = self.file_to_videofeature(sfname)
285 |             to_cache_video = True
286 | 
287 |         if afeature_out is None:
288 |             afeature_out = self.file_to_audiofeature(sfname)
289 |             to_cache_audio = True
290 | 
291 |         # Cache it
292 |         if _id is not None or cache_id is not None:
293 |             if _id is not None:
294 |                 cache_id = _id
295 |             if to_cache_video:
296 |                 self.cached_iframe(cache_id, frames_out)
297 |             if to_cache_audio:
298 |                 self.cached_audio(cache_id, afeature_out)
299 |         return (frames_out, afeature_out)
300 | 
301 |     def get_frames(self,_id = None, sfname = None, logs = True):
302 |         assert (_id is None) ^ (sfname is None)
303 |         if sfname is None:
304 |             sfname = self.downloadVideo(_id, logs)
305 |         if sfname is None:
306 |             return None
307 |         edir = "%s/v_%d" % (self.tdir, self.get_crazy_id())
308 |         if os.path.exists(edir):
309 |             shutil.rmtree(edir)
310 |         os.mkdir(edir)
311 |         cmd = "ffmpeg -i %s -vf fps=%d -s %dx%d %s/0_%%03d.jpg &> /dev/null" % (
312 |                    sfname, 5, VideoHandler.SHAPE[0], VideoHandler.SHAPE[1], edir) #&> /dev/null
313 |         if logs:
314 |             print(cmd)
315 |         returnStatus = os.system(cmd)
316 |         if returnStatus != 0:
317 |             print("Extracting Failed : %s" % sfname)
318 |             if os.path.exists(edir):
319 |                 print(cmd)
320 |                 print("Dir Exists")
321 |                 #shutil.rmtree(edir)
322 |             return None
323 |         files = os.listdir(edir)
324 |         files = [("%s/%s"%(edir,f)) for f in files]
325 |         LIMIT_FRAMES = 10
326 |         if len(files)<LIMIT_FRAMES:
327 |             return None
328 |         # TODO : Pick frames uniformly
329 |         files = files[:LIMIT_FRAMES]
330 |         return (edir, files)
331 | 
332 |     def assign_partial_model(self, partial_model):
333 |         self.partial_model = partial_model
334 | 
335 |     #@synchronized
336 |     def free_frames(self, edir):
337 |         if edir is not None and os.path.exists(edir):
338 |             try:
339 |                 shutil.rmtree(edir)
340 |             except Exception as e:
341 |                 print(str(e))
342 | 
343 | def autodownload():
344 |     print("Current Downloaded files")
345 |     print(vHandler.getDownloadedIds())
346 |     #vHandler.takebreak()
347 |     print("Downloading More!!!")
348 |     allIds = vHandler.getAllIds()
349 |     tot =  len(allIds)
350 |     for i,_id in enumerate(allIds):
351 |         vHandler.downloadVideo(_id)
352 |         percent = 100.0*(i+1)/tot
353 |         print("%.3f Completed!" % percent)
354 | 
355 | def cache_videoid(_id, percent):
356 |     print("%.3f caching scheduled!" % percent)
357 |     vHandler.get_iframes_audio(_id = _id)
358 | 
359 | def autocache():
360 |     import sys
361 |     import concurrent.futures
362 |     sys.path.append("..")
363 |     
364 |     from model import VModel
365 |     vmodel = VModel(-1,-1, cutoffonly = True)
366 |     vHandler.set_vmodel(vmodel)
367 |     ids = vHandler.getDownloadedIds()
368 |     tot = len(ids)
369 | 
370 |     # from concurrent.futures import ThreadPoolExecutor, wait
371 |     # pool = ThreadPoolExecutor(10)
372 |     futures = []
373 |     for i,_id in enumerate(ids):
374 |         percent = 100.0*(i+1)/tot
375 |         #futures.append(pool.submit(cache_videoid, _id, percent))
376 |         cache_videoid(_id, percent)
377 |     #print(wait(futures))
378 |     print("Caching Completed!")
379 | 
380 | def show_counts():
381 |     print("Training Videos   : %d " % len(vHandler.getTrainingIds()))
382 |     print("Validation Videos : %d " % len(vHandler.getValidationIds()))
383 |     print("Test Videos       : %d " % len(vHandler.getTestIds()))
384 | 
385 | def get_args():
386 |     parser = argparse.ArgumentParser()
387 |     parser.add_argument("-sc", "--show-count", help="show count for training/validation/test videos", action='store_true')
388 |     parser.add_argument("-d", "--download", help="download more videos to extend dataset", action='store_true')
389 |     parser.add_argument("-ac", "--auto-cache", help="cache all downloaded videos", action='store_true')
390 |     parser.add_argument("-strain", "--show-train", help="show ids for training videos", action='store_true')
391 |     parser.add_argument("-stest", "--show-test", help="show ids for test videos", action='store_true')
392 |     parser.add_argument("-sval", "--show-val", help="show ids for validation videos", action='store_true')
393 | 
394 |     return parser.parse_args()
395 | 
396 | if __name__ == "__main__":
397 |     args = get_args()
398 |     vHandler = VideoHandler(VideoHandler.s_fname_train, VideoHandler.s_fname_test)
399 |     if args.show_count:
400 |         show_counts()
401 |         exit()
402 |     if args.download:
403 |         autodownload()
404 |         exit()
405 |     if args.auto_cache:
406 |         autocache()
407 |         exit()
408 |     if args.show_train:
409 |         print("Train Ids")
410 |         pprint(vHandler.getTrainingIds())
411 |     if args.show_test:
412 |         print("Test Ids")
413 |         pprint(vHandler.getTestIds())
414 |     if args.show_val:
415 |         print("Validation Ids")
416 |         pprint(vHandler.getValidationIds())
417 | 
418 | 
419 | 


--------------------------------------------------------------------------------
/src/backend/vocab.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pickle
  4 | 
  5 | from scipy.interpolate import interp1d
  6 | 
  7 | from common.config import get_vocab_config
  8 | from common.logger import logger
  9 | from backend.utils import caption_tokenize
 10 | from backend.videohandler import VideoHandler
 11 | 
 12 | # Read
 13 | GLOVE_FILE = get_vocab_config()['GLOVE_FILE']
 14 | # Read or Write if not exists
 15 | WORD_EMBEDDED_CACHE = get_vocab_config()['WORD_EMBEDDED_CACHE']
 16 | VOCAB_FILE = get_vocab_config()['VOCAB_FILE']
 17 | 
 18 | def loadFromPickleIfExists(fname):
 19 |     if not os.path.exists(fname):
 20 |         logger.debug("Not loading pickle object from %s, file not found." % fname)
 21 |         return None
 22 |     try:
 23 |         with open(fname,'rb') as f:
 24 |             data = pickle.load(f)
 25 |             logger.debug("Loading pickle object from %s done." % fname)
 26 |             return data
 27 |     except Exception as e:
 28 |         logger.error("Exception in loading pickle object from %s: %s" % (fname, e))
 29 |     return None
 30 | 
 31 | class Vocab:
 32 |     OUTDIM_EMB = 300
 33 |     WORD_MIN_FREQ = 5
 34 |     VOCAB_SIZE = 9448
 35 |     CAPTION_LEN = 15
 36 | 
 37 |     def __init__(self, data, train_ids):
 38 |         # data = dict(id => captions)
 39 |         logger.debug("Glove File %s\nEmbedding File %s\nVocab File %s\n" % (GLOVE_FILE, WORD_EMBEDDED_CACHE, VOCAB_FILE))
 40 |         self.specialWords = dict()
 41 |         self.specialWords['START'] = '>'
 42 |         self.specialWords['END'] = '<'
 43 |         self.specialWords['NONE'] = '?!?'
 44 |         self.specialWords['EXTRA'] = '___'
 45 | 
 46 |         freshWordEmbedding = self.loadWordEmbedding(GLOVE_FILE)
 47 |         for word,enc in self.specialWords.items():
 48 |             assert enc in self.wordEmbedding.keys()
 49 |         self.buildVocab(data, train_ids, freshWordEmbedding)
 50 |         logger.debug("Vocab Build Completed")
 51 | 
 52 |     def loadWordEmbedding(self, glove_file):
 53 |         self.wordEmbedding = loadFromPickleIfExists(WORD_EMBEDDED_CACHE)
 54 |         if self.wordEmbedding:
 55 |             logger.debug("Embedding Loaded")
 56 |             return False
 57 |         else:
 58 |             self.wordEmbedding = dict()
 59 |             with open(glove_file, 'r') as f:
 60 |                 for i,line in enumerate(f):
 61 |                     tokens = line.split()
 62 |                     tokens = [tok.__str__() for tok in tokens]
 63 |                     word = tokens[0]
 64 |                     self.wordEmbedding[word] = np.asarray(tokens[1:], dtype='float32')
 65 |             minVal = float('inf')
 66 |             maxVal = -minVal
 67 |             for v in self.wordEmbedding.values():
 68 |                 for x in v:
 69 |                     minVal = min(minVal,x)
 70 |                     maxVal = max(maxVal,x)
 71 |             mapper = interp1d([minVal,maxVal],[-1,1])
 72 |             logger.info("Mapping minVal[%f], maxVal[%f] to [-1,1]  " % (minVal,maxVal))
 73 |             for w in self.wordEmbedding:
 74 |                 self.wordEmbedding[w] = mapper(self.wordEmbedding[w])
 75 |             print("Cross Check")
 76 |             print(self.wordEmbedding['good'])
 77 |             self.saveEmbedding()
 78 |             return True
 79 | 
 80 |     def saveEmbedding(self):
 81 |         with open(WORD_EMBEDDED_CACHE, 'wb') as f:
 82 |             pickle.dump(self.wordEmbedding,f)
 83 |             logger.info("Embedding Saved!")
 84 | 
 85 |     def buildVocab(self, data, train_ids, trimEmbedding):
 86 |         self.ind2word = loadFromPickleIfExists(VOCAB_FILE)
 87 |         if not self.ind2word:
 88 |             logger.debug("Building Vocab")
 89 |             x = {}
 90 |             allWords = set()
 91 |             for w in self.wordEmbedding.keys():
 92 |                 allWords.add(w)
 93 |             logger.debug("Cached all Embedded Words")
 94 |             for _id,captions in data.items():
 95 |                 if _id not in train_ids:
 96 |                     continue
 97 |                 for cap in captions:
 98 |                     for w in caption_tokenize(cap):
 99 |                         if w not in allWords:
100 |                             continue
101 |                         if w not in x.keys():
102 |                             x[w]=1
103 |                         else:
104 |                             x[w]+=1
105 |             assert 'tshirt' not in x.keys()
106 |             assert 'tshirt' not in allWords
107 |             logger.debug("Iterated over all captions")
108 |             self.ind2word = []
109 |             for w,enc in self.specialWords.items():
110 |                 self.ind2word.append(enc)
111 |             self.ind2word.extend([w for w in x.keys() if x[w]>=Vocab.WORD_MIN_FREQ])
112 |             with open(VOCAB_FILE,'wb') as f:
113 |                 pickle.dump(self.ind2word,f)
114 |                 logger.debug("Vocab File saved")
115 |         logger.info("Vocab Size : %d"%len(self.ind2word))
116 |         self.word2ind = dict()
117 |         for i,w in enumerate(self.ind2word):
118 |             self.word2ind[w]=i
119 |         assert 'tshirt' not in self.wordEmbedding.keys()
120 |         assert 'tshirt' not in self.word2ind.keys()
121 |         logger.debug("Words to be in vocab %d found %d" % (Vocab.VOCAB_SIZE, len(self.ind2word)))
122 |         assert len(self.ind2word) == Vocab.VOCAB_SIZE
123 |         if trimEmbedding:
124 |             newEmbedding = dict()
125 |             logger.debug("Trimming Word Embedding")
126 |             for w in self.ind2word:
127 |                 newEmbedding[w] = self.wordEmbedding[w]
128 |             self.wordEmbedding=newEmbedding
129 |             logger.debug("Trimming Word Embedding Done")
130 |             self.saveEmbedding()
131 | 
132 |     def get_filteredword(self,w):
133 |         if w in self.word2ind.keys():
134 |             return w
135 |         return self.specialWords['EXTRA']
136 | 
137 |     def fit_caption_tokens(self,tokens,length,addPrefix,addSuffix):
138 |         tok = []
139 |         tokens = tokens[0:length]
140 |         if addPrefix:
141 |             tok.append(self.specialWords['START'])
142 |         tok.extend(tokens)
143 |         if addSuffix:
144 |             tok.append(self.specialWords['END'])
145 |         for i in range(length-len(tokens)):
146 |             tok.append(self.specialWords['NONE'])
147 |         return tok
148 |         
149 |     def onehot_word(self,w):
150 |         encode = [0] * Vocab.VOCAB_SIZE
151 |         encode[self.word2ind[w]] = 1
152 |         return encode
153 |     
154 |     def word_fromonehot(self, onehot):
155 |         index = np.argmax(onehot)
156 |         return self.ind2word[index]
157 |         
158 |     def get_caption_encoded(self,caption,glove, addPrefix, addSuffix):
159 |         tokens = caption_tokenize(caption)
160 |         tokens = self.fit_caption_tokens(tokens, Vocab.CAPTION_LEN, addPrefix, addSuffix)
161 |         tokens = [self.get_filteredword(x) for x in tokens]
162 |         # logger.debug("Working on Caption %s " % str(tokens))
163 |         if glove: 
164 |             return [self.wordEmbedding[x] for x in tokens]
165 |         else:
166 |             return [self.onehot_word(x) for x in tokens]
167 | 
168 |     def get_caption_from_indexs(self,indx):
169 |         s = ' '.join([self.ind2word[x] for x in indx])
170 |         return s
171 |         
172 | def vocabBuilder():
173 |     vHandler = VideoHandler(VideoHandler.s_fname_train, VideoHandler.s_fname_test)
174 |     train_ids = vHandler.get_otrain_ids()
175 |     captionData = vHandler.getCaptionData()
176 |     vocab = Vocab(captionData, train_ids)
177 |     return [vHandler, vocab]
178 |     
179 | if __name__ == "__main__":
180 |    vocabBuilder()


--------------------------------------------------------------------------------
/src/backend/vpreprocess.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import os
  4 | 
  5 | from keras.preprocessing import image
  6 | 
  7 | from common.config import get_vpreprocess_config
  8 | from common.logger import logger
  9 | from backend.vocab import vocabBuilder
 10 | 
 11 | 
 12 | BADLOGS = get_vpreprocess_config()["LOGS_DIR"]
 13 | 
 14 | def badLogs(msg):
 15 |     logger.debug(msg)
 16 |     with open(BADLOGS,"a") as f:
 17 |         f.write(msg)
 18 | 
 19 | class Preprocessor:
 20 |     def __init__(self):
 21 |         self.vHandler,self.vocab = vocabBuilder()
 22 | 
 23 |     def set_vmodel(self, vmodel):
 24 |         self.vHandler.set_vmodel(vmodel)
 25 | 
 26 |     def imageToVec(self, fname):
 27 |         NEED_W = 224
 28 |         NEED_H = 224
 29 |         img = image.load_img(fname, target_size=(NEED_H, NEED_W))
 30 |         x = image.img_to_array(img)
 31 |         x /= 255.
 32 |         x -= 0.5
 33 |         x *= 2.
 34 |         return x
 35 | 
 36 |     '''
 37 |     Either convert videos from ids or frame file names
 38 |     '''
 39 |     COUNTER = 0
 40 |     def videoToVec(self, _id = None, vfname = None, cache_id = None):
 41 |         assert (_id is None) ^ (vfname is None)
 42 |         if not _id == None:
 43 |             out = self.vHandler.get_iframes_audio(_id = _id, logs = False)
 44 |         else:
 45 |             out = self.vHandler.get_iframes_audio(sfname = vfname, logs = False, cache_id = cache_id)
 46 |         if out is None:
 47 |             return None
 48 |         (frames, afeatures) = out
 49 |         return frames, afeatures
 50 |         # deprecated
 51 |         fnames = None
 52 |         edir = None
 53 |         if fnames is None:
 54 |             ef = self.vHandler.get_frames(_id = _id, logs = False)
 55 |             if ef is not None:
 56 |                 edir, fnames = ef
 57 |         if fnames is None:
 58 |             return None
 59 |         content = []
 60 |         for i,fname in enumerate(fnames):
 61 |             content.append(self.imageToVec(fname))
 62 |         self.vHandler.free_frames(edir)
 63 | 
 64 |         #if len(fnames)>0:
 65 |         #    os.system("cp \"%s\" ~/TESTING/%04d.jpg" % (fnames[0],Preprocessor.COUNTER))
 66 |         #    Preprocessor.COUNTER += 1
 67 |         return content
 68 | 
 69 |     def get_video_content(self, vfname, cache_id = None):
 70 |         return self.videoToVec(vfname = vfname, cache_id = cache_id)
 71 | 
 72 |     def get_video_caption(self, _id, just_one_caption = True):
 73 |         vid_a = self.videoToVec(_id = _id)
 74 |         if vid_a is None:
 75 |             return None
 76 |         (vid, afeature) = vid_a
 77 |         data = self.vHandler.getCaptionData()
 78 |         out = []
 79 |         for cur_caption in data[_id]:
 80 |             captionIn = self.vocab.get_caption_encoded(cur_caption, True, True, False)
 81 |             captionOut = self.vocab.get_caption_encoded(cur_caption, False, False, True)
 82 |             out.append([afeature,vid,captionIn,captionOut])
 83 |         if len(out) == 0:
 84 |             return None
 85 |         if just_one_caption:
 86 |             return [random.choice(out)]
 87 |         return out
 88 | 
 89 |     def datas_from_ids(self, idlst):
 90 |         logger.debug("\n Loading Video/Captions for ids : %s" % str(idlst))
 91 |         afeatures = []
 92 |         vids   = []
 93 |         capIn  = []
 94 |         capOut = []
 95 |         for _id in idlst:
 96 |             vccs = self.get_video_caption(_id, just_one_caption = True)
 97 |             if vccs is None:
 98 |                 continue
 99 |             for vcc in vccs:
100 |                 _afeature, _vid, _capIn, _capOut = vcc
101 |                 afeatures.append(_afeature)
102 |                 vids.append(_vid)
103 |                 capIn.append(_capIn)
104 |                 capOut.append(_capOut)
105 |         afeatures  = np.asarray(afeatures)
106 |         capIn  = np.asarray(capIn)
107 |         capOut = np.asarray(capOut)
108 |         vids   = np.asarray(vids)
109 | 
110 |         logger.debug("Shape vids   %s [max distinct %d]" % (str(np.shape(vids)),len(idlst)))
111 |         logger.debug("Shape afeatures  %s" % str(np.shape(afeatures)))
112 |         logger.debug("Shape CapIn  %s" % str(np.shape(capIn)))
113 |         logger.debug("Shape CapOut %s" % str(np.shape(capOut)))
114 | 
115 | 
116 |         return [[capIn,afeatures,vids],capOut]
117 |  
118 |     def get_nextbatch(self, batch_size, arr_counter, ids):
119 |         assert len(ids) > 0
120 |         count = arr_counter[0]
121 |         start = (count * batch_size) % len(ids)
122 |         idlst = []
123 |         for i in xrange(batch_size):
124 |             idlst.append(ids[start])
125 |             start = (start + 1) % len(ids)
126 | 
127 |         count = (count +1 ) % len(ids)
128 |         arr_counter[0] = count
129 |         return idlst
130 |         
131 |     '''
132 |     typeSet 0:Training dataset, 1: Validation dataset, 2: Test Dataset
133 |     '''
134 |     # Sequential
135 |     def data_generator(self, batch_size, start=0, typeSet = 0):
136 |         if typeSet == 0:
137 |             ids = self.vHandler.getTrainingIds()
138 |         elif typeSet == 1:
139 |             ids = self.vHandler.getValidationIds()
140 |         elif typeSet == 2:
141 |             ids = self.vHandler.getTestIds()
142 |         else:
143 |             assert False
144 |         random.shuffle(ids)
145 |         arr_counter = [0]
146 |         count = (len(ids)+batch_size-1)//batch_size
147 |         assert count > 0
148 |         logger.debug("Max Batches of type %d : %d " % (typeSet, count))
149 |         while True:
150 |             idlst = self.get_nextbatch(batch_size, arr_counter, ids)
151 |             data = self.datas_from_ids(idlst)
152 |             ndata = []
153 |             for d in data:
154 |                 if d is not None:
155 |                     ndata.append(d)
156 |             if len(ndata) > 0:
157 |                 yield ndata
158 | 
159 |     # Random
160 |     def data_generator_random(self, batch_size, start=0, typeSet = 0):
161 |         if typeSet == 0:
162 |             ids = self.vHandler.getTrainingIds()
163 |         elif typeSet == 1:
164 |             ids = self.vHandler.getValidationIds()
165 |         elif typeSet == 2:
166 |             ids = self.vHandler.getTestIds()
167 |         else:
168 |             assert False
169 |         random.shuffle(ids)
170 |         count = (len(ids) + batch_size - 1)//batch_size
171 |         assert count > 0
172 |         if start == -1:
173 |             start = random.randint(0,count)
174 |         logger.debug("Max Batches of type %d : %d " % (typeSet, count))
175 |         #start = start % count
176 |         while True:
177 |             bs = batch_size
178 |             if bs>len(ids):
179 |                 bs=len(ids)
180 |                 logger.debug("FORCE Reducing Batch Size to %d from %d",bs,batch_size)
181 |             idlst = random.sample(ids,bs)
182 |             data = self.datas_from_ids(idlst)
183 |             ndata = []
184 |             for d in data:
185 |                 if d is not None:
186 |                     ndata.append(d)
187 |             if len(ndata) > 0:
188 |                 yield ndata
189 |             #start = (start + 1)%count
190 | 


--------------------------------------------------------------------------------
/src/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/common/__init__.py


--------------------------------------------------------------------------------
/src/common/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Configuration Parser for V2D
 3 | """
 4 | 
 5 | import json
 6 | import threading
 7 | import os
 8 | 
 9 | lock = threading.Lock()
10 | 
11 | def get_config():
12 |     with lock:
13 |         if hasattr(get_config, "config"):
14 |             return get_config.config
15 | 
16 |         fname = os.environ.get("V2D_CONFIG_FILE", "config.json")
17 |         with open(fname, "r") as fin:
18 |             get_config.config = json.load(fin)
19 |         return get_config.config
20 | 
21 | def clear():
22 |     with lock:
23 |         if hasattr(get_config, "config"):
24 |             delattr(get_config, "config")
25 | 
26 | def get_app_config():
27 |     return get_config()["app"]
28 | 
29 | def get_rpc_config():
30 |     return get_config()["rpc"]
31 | 
32 | def get_vpreprocess_config():
33 |     return get_config()["vpreprocess"]
34 | 
35 | def get_vocab_config():
36 |     return get_config()["vocab"]
37 | 
38 | def get_tests_config():
39 |     return get_config()["tests"]
40 | 


--------------------------------------------------------------------------------
/src/common/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | 
3 | logger = logging.getLogger('root')
4 | FORMAT = "[%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s"
5 | logging.basicConfig(format=FORMAT)
6 | logger.setLevel(logging.DEBUG)
7 | 


--------------------------------------------------------------------------------
/src/common/rpc.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | import traceback
 3 | 
 4 | from six.moves.xmlrpc_client import ServerProxy
 5 | from six.moves.xmlrpc_server import SimpleXMLRPCServer
 6 | 
 7 | from common.config import get_rpc_config
 8 | from common.logger import logger
 9 | 
10 | 
11 | CONFIG = get_rpc_config()
12 | SERVER_RUNAS = CONFIG["RPC_SERVER_RUNAS"]
13 | PORT = CONFIG["RPC_PORT"]
14 | SERVER_IP = CONFIG["RPC_ENDPOINT"]
15 | 
16 | lock = threading.Lock()
17 | 
18 | def rpc_decorator(f):
19 |     def new_f(*args, **kwargs):
20 |         try:
21 |             return f(*args, **kwargs)
22 |         except Exception as e:
23 |             tb = traceback.format_exc()
24 |             logger.error("Exception raised in rpc %s, %s\n%s" % (f, e, tb))
25 |             raise e
26 |     return new_f
27 | 
28 | def close_framework():
29 |     exit()
30 | 
31 | def register_server(framework):
32 |     print('Preparing for Register Server')
33 |     server = SimpleXMLRPCServer((SERVER_RUNAS, PORT))
34 |     print('Listening to %d' % PORT)
35 |     server.register_function(rpc_decorator(framework.predict_fnames), 'predict_fnames')
36 |     server.register_function(rpc_decorator(framework.predict_ids), 'predict_ids')
37 |     server.register_function(rpc_decorator(framework.get_weights_status), 'get_weights_status')
38 |     server.register_function(rpc_decorator(close_framework), 'close_framework')
39 |     print("[RPC][Server][Started]")
40 |     try:
41 |         server.serve_forever()
42 |     except KeyboardInterrupt:
43 |         raise
44 |     except Exception:
45 |         raise
46 |     finally:
47 |         print("[RPC][Server][Closing]")
48 |         server.server_close()
49 | 
50 | 
51 | def get_rpc():
52 |     with lock:
53 |         if hasattr(get_rpc, 'proxy'):
54 |             return get_rpc.proxy
55 |         get_rpc.proxy = ServerProxy("http://%s:%d/" % (SERVER_IP, PORT))
56 |         return get_rpc.proxy
57 | 


--------------------------------------------------------------------------------
/src/common/status.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | class ModelWeightsStatus(Enum):
4 |     NO_INFO         = 0
5 |     SUCCESS         = 1
6 |     MODEL_NOT_FOUND = 2
7 |     WIP             = 3


--------------------------------------------------------------------------------
/src/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"app": {
 3 |         "PREDICT_MODE_ONLY": true,
 4 |         "PREFIX":"/home/scopeinfinity/BTP/Video2Description/src/",
 5 |         "MAX_CONTENT_LENGTH": 10649600,
 6 |         "UPLOAD_FOLDER": "/home/scopeinfinity/BTP/Video2Description/uploads/",
 7 |         "VIDEOS_DATASET": "/home/scopeinfinity/BTP/Video2Description/dataset/",
 8 |         "VIDEOS_FOLDER": "/home/scopeinfinity/BTP/Video2Description/dataset/videos/",
 9 |         "DATASET_CACHE": "/home/scopeinfinity/BTP/Video2Description/dataset_cache/"
10 |     },
11 |     "rpc": {
12 | 		"RPC_SERVER_RUNAS": "127.0.0.1",
13 | 		"RPC_PORT": 5001,
14 | 		"RPC_ENDPOINT": "127.0.0.1"
15 |  	},
16 |  	"vpreprocess": {
17 |         "COCOFNAME": "/home/scopeinfinity/cococaption/cocoeval.py",
18 |         "LOGS_DIR": "/tmp/v2d_vpreprocess.log"
19 |     },
20 |     "vocab": {
21 |         "GLOVE_FILE": "/home/scopeinfinity/BTP/Video2Description/dataset/glove.6B.300d.txt",
22 |         "WORD_EMBEDDED_CACHE": "/home/scopeinfinity/BTP/Video2Description/dataset_cache/glove_300.dat",
23 |         "VOCAB_FILE": "/home/scopeinfinity/BTP/Video2Description/dataset_cache/vocab.dat"
24 |     },
25 |     "tests": {
26 |         "dir_videos": "./tests/data/videos"
27 |     }
28 | }


--------------------------------------------------------------------------------
/src/config_docker.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "app": {
 3 |         "PREDICT_MODE_ONLY": true,
 4 |         "PREFIX":"/home/si/v2d/src",
 5 |         "MAX_CONTENT_LENGTH": 10649600,
 6 |         "UPLOAD_FOLDER": "/home/si/v2d/uploads/",
 7 |         "VIDEOS_DATASET": "/home/si/v2d/dataset/",
 8 |         "VIDEOS_FOLDER": "/home/si/v2d/dataset/videos/",
 9 |         "DATASET_CACHE": "/home/si/v2d/dataset_cache/"
10 |     },
11 |     "rpc": {
12 |         "RPC_SERVER_RUNAS": "172.14.0.2",
13 |         "RPC_PORT": 5001,
14 |         "RPC_ENDPOINT": "172.14.0.2"
15 |     },
16 |     "vpreprocess": {
17 |         "COCOFNAME": "/home/si/coco-caption/pycocoevalcap/eval.py",
18 |         "LOGS_DIR": "/var/log/v2d/vpreprocess.log"
19 |     },
20 |     "vocab": {
21 |         "GLOVE_FILE": "/home/si/v2d/dataset/glove.6B.300d.txt",
22 |         "WORD_EMBEDDED_CACHE": "/home/si/v2d/dataset_cache/glove_300.dat",
23 |         "VOCAB_FILE": "/home/si/v2d/dataset_cache/vocab.dat"
24 |     },
25 |     "tests": {
26 |         "dir_videos": "./tests/data/videos"
27 |     }
28 | }


--------------------------------------------------------------------------------
/src/frontend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/frontend/__init__.py


--------------------------------------------------------------------------------
/src/frontend/app.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import random
  5 | import re
  6 | import traceback
  7 | 
  8 | from copy import deepcopy
  9 | from flask import Flask, render_template, request, send_from_directory
 10 | from waitress import serve
 11 | 
 12 | from common.config import get_app_config
 13 | from common.rpc import get_rpc
 14 | from common.status import ModelWeightsStatus
 15 | 
 16 | app = Flask(__name__)
 17 | config = get_app_config()
 18 | 
 19 | PREDICT_MODE_ONLY = config["PREDICT_MODE_ONLY"]
 20 | PREFIX = config["PREFIX"]
 21 | app.config['MAX_CONTENT_LENGTH'] =  config["MAX_CONTENT_LENGTH"]
 22 | app.config['UPLOAD_FOLDER'] = config["UPLOAD_FOLDER"]
 23 | app.config['VIDEOS_FOLDER'] = config["VIDEOS_FOLDER"]
 24 | 
 25 | navigation = [("./","Predict",False)]
 26 | 
 27 | if not PREDICT_MODE_ONLY:
 28 | 	navigation.extend([("./get_ids","Get ID's",False),("./play","Play Videos",False)])
 29 | 
 30 | 	# Don't even define the methods! And definately need to improve this.
 31 | 	def get_train_ids():
 32 | 		command = "python %s/backend/videohandler.py -strain" % PREFIX
 33 | 		return os.popen(command).read()
 34 | 
 35 | 	def get_test_ids():
 36 | 		command = "python %s/backend/videohandler.py -stest" % PREFIX
 37 | 		return os.popen(command).read()
 38 | 
 39 | 	def get_val_ids():
 40 | 		command = "python %s/backend/videohandler.py -sval" % PREFIX
 41 | 		return os.popen(command).read()
 42 | 
 43 | 	def get_all_ids():
 44 | 		command = "python %s/backend/videohandler.py -sval -stest -strain" % PREFIX
 45 | 		return os.popen(command).read()
 46 | 
 47 | 	def predict_ids(ids):
 48 | 		proxy = get_rpc()
 49 | 		return proxy.predict_ids(ids)
 50 | 
 51 | 	@app.route("/play")
 52 | 	def play():
 53 | 		return render_template('play.html', navigation = getactivenav(2))
 54 | 
 55 | 	@app.route("/get_ids")
 56 | 	def get_ids():
 57 | 		content = dict()
 58 | 		content['ids'] = get_all_ids()
 59 | 		return render_template('get_ids.html', navigation=getactivenav(1), content = content).replace("]","]<br/><br/>")
 60 | 
 61 | 	@app.route("/predict")
 62 | 	def predict_page(fnames = None):
 63 | 		if request.args.get('fnames'):
 64 | 			return computeAndRenderPredictionFnames(re.sub("[^0-9 ]", "", request.args.get('fnames')))
 65 | 		if (not PREDICT_MODE_ONLY) and request.args.get('ids'):
 66 | 			return computeAndRenderPredictionIDs(ids = re.sub("[^0-9 ]", "", request.args.get('ids')))
 67 | 		return "Invalid Request"
 68 | 	
 69 | 	@app.route('/download', methods=['GET'])
 70 | 	def download_file():
 71 | 	  _id = request.args.get('id')
 72 | 	  if _id  and unicode(_id).isnumeric():
 73 | 	    return send_from_directory(app.config['VIDEOS_FOLDER'],str(_id)+".mp4")
 74 | 	    return "File Not Exists"
 75 | 	  return "Invalid Request"
 76 | 
 77 | def predict_fnames(fnames):
 78 |     proxy = get_rpc()
 79 |     return proxy.predict_fnames(fnames)
 80 | 
 81 | def model_weights_notify():
 82 |     proxy = get_rpc()
 83 |     try:
 84 |         status = proxy.get_weights_status()
 85 |         if status == str(ModelWeightsStatus.SUCCESS):
 86 |             return None
 87 |         return status
 88 |     except Exception as e:
 89 |         print("model_weights_notify failed: %s" % e)
 90 |         return "Failed to communicate."
 91 | 
 92 | def getactivenav(index):
 93 |     nav = deepcopy(navigation)
 94 |     nav[index] = (nav[index][0], nav[index][1], True)
 95 |     return nav
 96 | 
 97 | @app.route("/model_weights_status")
 98 | def model_weights_status():
 99 |     return model_weights_notify() or "[SUCCESS]"
100 | 
101 | @app.route("/")
102 | def home():
103 |     weights_notify = model_weights_notify()
104 |     if PREDICT_MODE_ONLY:
105 |         return render_template(
106 |             'publicindex.html',
107 |             weights_notify = weights_notify)
108 |     else:
109 |         return render_template('index.html', navigation = getactivenav(0))
110 | 
111 | def computeAndRenderPredictionIDs(ids):
112 | 	content = dict()
113 | 	content['ids'] = ids
114 | 	content['data_ids'] = predict_ids(ids)
115 | 	return render_template('predict.html', content = content)
116 | 
117 | def computeAndRenderPredictionFnames(fnames):
118 | 	content = dict()
119 | 	content['fnames'] = fnames
120 | 	content['data_fnames'] = predict_fnames(fnames)
121 | 	return render_template('predict.html', content = content)
122 | 
123 | # http://flask.pocoo.org/docs/0.12/patterns/fileuploads/
124 | def allowed_file(filename):
125 |     return '.' in filename and \
126 |            filename.rsplit('.', 1)[1].lower() in ['mp4']
127 | 
128 | def error(msg):
129 |     return json.dumps({'error':msg})
130 | 
131 | def success(data):
132 |     return json.dumps({'success':data})
133 | 
134 | @app.route('/upload', methods=['POST'])
135 | def upload_file():
136 |     print(request.files)
137 |     if request.method != "POST":
138 |         return error("Only POST requests are expected!")
139 |     if "file" not in request.files:
140 |         return error("No filess found!")
141 |     file = request.files['file']
142 |     if not file:
143 |         return error("No file found!")
144 |     if file.filename == '':
145 |         return error("No filename found!")
146 |     if not allowed_file(file.filename):
147 |         return error("Only *.mp4 video files are supported at this moment!")
148 |     filename = str(random.randint(0,1000000)) + ".mp4"
149 |     filename = os.path.join(app.config["UPLOAD_FOLDER"], filename)
150 |     try:
151 |         file.save(filename)
152 |         print("File uploaded: %s" %  filename)
153 |         output = json.loads(predict_fnames([filename]))
154 |     except Exception as e:
155 |         print(traceback.format_exc())
156 |         return error("Request Failed! Exception caught while generating caption.")
157 |     finally:
158 |         os.unlink(filename)
159 |     return success(output)
160 | 
161 | def main():
162 |     parser = argparse.ArgumentParser()
163 |     parser.add_argument('-p', '--port', type = int, default=5000)
164 |     args = parser.parse_args()
165 |     serve(app, host='0.0.0.0', port=args.port)
166 | 
167 | if __name__ == "__main__":
168 |     main()


--------------------------------------------------------------------------------
/src/frontend/templates/get_ids.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 | {% block content %}
3 | <div class="panel panel-default">
4 | <div class="panel-heading">Train / Validation / Test ID's</div>
5 | <div class="panel-body">{{ content['ids'] }}</div>
6 | </div>
7 | {% endblock %}
8 | 


--------------------------------------------------------------------------------
/src/frontend/templates/index.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block content %}
 3 | <div class="panel panel-default col-xs-4" >
 4 | <div class="panel-heading">Predict Using IDs</div>
 5 | <div class="panel-body">
 6 | 	 <form action="predict" target="results">
 7 | 	  <div class="form-group">
 8 | 	    <label for="ids">Enter ID's (space separated)</label>
 9 | 	    <input type="text" class="form-control" id="ids" name="ids">
10 | 	  </div>
11 | 	  <button type="submit" class="btn btn-default">Submit</button>
12 | 	</form> 
13 | </div>
14 | </div>
15 | 
16 | <div class="panel panel-default col-xs-4">
17 | <div class="panel-heading">Predict Using File Names</div>
18 | <div class="panel-body">
19 | 	 <form action="predict" target="results">
20 | 	  <div class="form-group">
21 | 	    <label for="fnames">Enter Filenames (space separated)</label>
22 | 	    <input type="text" class="form-control" id="fnames" name="fnames">
23 | 	  </div>
24 | 	  <button type="submit" class="btn btn-default">Submit</button>
25 | 	</form> 
26 | </div>
27 | </div>
28 | 
29 | <div class="panel panel-default col-xs-4">
30 | <div class="panel-heading">Predict via Uploading Media</div>
31 | <div class="panel-body">
32 | 	 <form action="upload" target="results" method="post" enctype="multipart/form-data">
33 | 	  <div class="form-group">
34 | 	    <label for="file">Upload File</label>
35 | 	    <input type="file" class="form-control" id="file" name="file">
36 | 	  </div>
37 | 	  <input type="submit" class="btn btn-default"  value="Upload">
38 | 	</form> 
39 | </div>
40 | </div>
41 | <iframe id="results" name="results" class="col-xs-12 col-sm-12 col-md-12" style="min-height: 1024px;"></iframe>
42 | {% endblock %}
43 | 


--------------------------------------------------------------------------------
/src/frontend/templates/layout.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title>Video2Description</title>
 5 | 	 <!-- Latest compiled and minified CSS -->
 6 | 	<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css">
 7 | 
 8 | 	<!-- jQuery library -->
 9 | 	<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
10 | 
11 | 	<!-- Latest compiled JavaScript -->
12 | 	<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"></script> 
13 | <style>
14 | .modal {
15 |     overflow-y: auto;
16 | }
17 | pre {
18 |     display: block;
19 |     font-family: monospace;
20 |     white-space: pre;
21 |     margin: 1em 0;
22 | } 
23 | </style></head>
24 | <body>
25 | {% if navigation %}
26 | <nav class="navbar navbar-default">
27 | 	  <div class="container-fluid">
28 | 	    <div class="navbar-header">
29 | 	      <a class="navbar-brand" href="#">Video2Description</a>
30 | 	    </div>
31 | 	    <ul class="nav navbar-nav">
32 |       {% for (link,title,active) in navigation %} 
33 | 			<li {% if active %} class="active" {% endif %}><a href='{{ link }}'>{{ title }}</a></li>
34 |       {% endfor %}
35 | 	    </ul>
36 | 	  </div>
37 | 	</nav>
38 | {% endif %}
39 | {% block content %}{% endblock %}
40 | </body>
41 | </html>
42 | 


--------------------------------------------------------------------------------
/src/frontend/templates/play.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block content %}
 3 | <center class="justify-content-center">
 4 | <div class="panel panel-default col-xs-4" >
 5 | <div class="panel-heading">Play Video</div>
 6 | <div class="panel-body">
 7 | 	  <div class="form-group">
 8 | 	    <label for="ids">Enter ID</label>
 9 | 	    <input type="text" class="form-control" id="fid">
10 | 	  </div>
11 | 	  <button type="submit" class="btn btn-default" onclick="reloadvideo()">Play</button>
12 | 	</form> 
13 | </div>
14 | </div>
15 | <div "panel panel-default col-xs-8">
16 |   <center>
17 |     <video class="embed-responsive-item" width="640" height="480" id="video" autoplay>
18 |         <source id="vsource" src="" type="video/mp4">
19 |     </video>
20 |   </center>
21 | </div>
22 | <script>
23 | function reloadvideo() {
24 |    var video = document.getElementById('video');
25 |    video.controls = true; 
26 |    var vsource = document.getElementById('vsource');
27 |    vsource.setAttribute('src', "./download?id="+document.getElementById('fid').value.trim());
28 |    video.pause();
29 |    video.load();
30 |    video.play();
31 |    console.log("Play Now")
32 | }
33 | </script>
34 | </center>
35 | {% endblock %}
36 | 


--------------------------------------------------------------------------------
/src/frontend/templates/predict.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block content %}
 3 | {% if content['ids'] %}
 4 | <div class="panel panel-default">
 5 | <div class="panel-heading"><b>Processed</b><br>{{ content['ids'] }}</div>
 6 | <div class="panel-body">
 7 | <pre>
 8 | {{ content['data_ids'] }}
 9 | </pre>
10 | </div>
11 | </div>
12 | {% endif %}
13 | {% if content['fnames'] %}
14 | <div class="panel panel-default">
15 | <div class="panel-heading"><b>Processed</b><br>{{ content['fnames'] }}</div>
16 | <div class="panel-body">
17 | <pre>
18 | {{ content['data_fnames'] }}
19 | </pre>
20 | </div>
21 | </div>
22 | {% endif %}
23 | {% endblock %}
24 | 


--------------------------------------------------------------------------------
/src/frontend/templates/publicindex.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |     <title>Video2Description</title>
  5 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css">
  6 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
  7 |     <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"></script> 
  8 | </head>
  9 | 
 10 | <body style="background-color: #e6004c;">
 11 | <div class="container">
 12 |     {% if weights_notify %}
 13 |     <dir class="alert alert-danger">Backed not ready: {{ weights_notify }}</dir>
 14 |     {% endif %}
 15 |     <div class="row" style="text-align: center;">
 16 |         <div class="col-sm-4" style="display: inline-block; float: none;">
 17 |             <h2 style="color: white;">Describe video using text?</h2>
 18 |             <form id="form_v2d" action="upload" method="post" enctype="multipart/form-data">
 19 |                 <input type="file" class="form-control" id="file" name="file"> <br>
 20 |                 <input type="submit" class="btn btn-primary" onclick="upload()" value="Upload Video">
 21 |             </form>
 22 |         </div>
 23 |     </div>
 24 |     <dir class="row" id="notifications">
 25 |         <dir id="notification" class="alert alert-info">Keep Smiling <b>:)</b></dir>
 26 |     </dir>
 27 |     <dir class="row">
 28 |        <div class="panel panel-default">
 29 |           <div class="panel-heading">Description of video</div>
 30 |           <div class="panel-body" id="results" style="white-space: pre-line;">Please upload some file.</div>
 31 |         </div>
 32 |     </dir>
 33 |     <script type="text/javascript">
 34 |         "use strict";
 35 | 
 36 |         /**
 37 |          * Push message in the notification section.
 38 |          * @param  {string} type Bootstrap alert type. For ex. info, success, warning or danger
 39 |          * @param  {string} msg  Message to push
 40 |          */
 41 |         function add_notification(type, msg) {
 42 |             $(document.createElement('div'))
 43 |                 .addClass('alert')
 44 |                 .addClass('alert-'+type)
 45 |                 .text(msg)
 46 |                 .delay(5000).slideUp(300, function() {
 47 |                     $(this).alert('close');
 48 |                 })
 49 |                 .appendTo($('#notifications'));
 50 |         }
 51 | 
 52 |         /**
 53 |          * Updated static notification bar.
 54 |          * @param  {string} type Bootstrap alert type. For ex. info, success, warning or danger
 55 |          * @param  {string} msg  Message for notification
 56 |          */
 57 |         function update_notification(type, msg) {
 58 |             $('#notification').removeClass()
 59 |                               .addClass('alert')
 60 |                               .addClass('alert-'+type)
 61 |                               .text(msg);
 62 |         }
 63 | 
 64 |         /**
 65 |          * Form submission with attached video and response handling.
 66 |          */
 67 |         function upload() {
 68 |              $('#form_v2d').submit(function(){
 69 |                 var form = $(this);
 70 |                 var data = new FormData(form[0]);
 71 |                 $.ajax({
 72 |                     url : form.attr('action'),
 73 |                     type: form.attr('method'),
 74 |                     data : data,
 75 |                     dataType: 'json',
 76 |                     processData:false,
 77 |                     contentType:false,
 78 |                     beforeSend: function(){
 79 |                         update_notification('info', 'Video is being uploaded and processed, please wait....');
 80 |                     },
 81 |                     success: function( data ) {
 82 |                         if ('success' in data) {
 83 |                             var result = [];
 84 |                             // Assumes we query caption for a video.
 85 |                             for(var i=0;i<data['success'].length;i+=1) {
 86 |                                 result.push(data['success'][i]['output']);
 87 |                             }
 88 |                             if (result.length == 0) {
 89 |                                 update_notification('danger', 'Something went wrong! No results found within successful request.')
 90 |                             } else {
 91 |                                 $('#results').text(result.join('\n'));
 92 |                                 update_notification('success', 'Request Completed.')
 93 |                             }
 94 |                         } else {
 95 |                             update_notification('warning', "Request Failed: "+data['error']);
 96 |                         }
 97 |                     },
 98 |                     error: function(xhr, status, error) {
 99 |                         update_notification('warning', 'Request Failed: '+error);
100 |                     }
101 |                 });
102 |                 return false;
103 |             });
104 |         }
105 |     </script>
106 | </div>
107 | </body>
108 | </html>


--------------------------------------------------------------------------------
/src/run_tests.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | python -m unittest discover tests/


--------------------------------------------------------------------------------
/src/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/tests/__init__.py


--------------------------------------------------------------------------------
/src/tests/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/tests/data/__init__.py


--------------------------------------------------------------------------------
/src/tests/data/fetcher.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from common.config import get_tests_config
 4 | 
 5 | 
 6 | DIR_VIDEOS = get_tests_config()["dir_videos"]
 7 | 
 8 | def get_videopath(fname):
 9 |     '''Returns path of given test video file.'''
10 |     return os.path.join(DIR_VIDEOS, fname)


--------------------------------------------------------------------------------
/src/tests/data/test_fetcher.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import os
 3 | 
 4 | from tests.data import fetcher
 5 | 
 6 | 
 7 | class TestFetcher(unittest.TestCase):
 8 | 
 9 |     def test_get_videopath_success(self):
10 |         path = fetcher.get_videopath(".content")
11 |         self.assertTrue(os.path.exists(path))
12 |         with open(path, "r") as f:
13 |             self.assertEqual("I_AM_VIDEO_TESTDATA_DIR", f.read().strip())
14 | 
15 |     def test_get_videopath_failure(self):
16 |         path = fetcher.get_videopath("bad_filename.mp4")
17 |         self.assertFalse(os.path.exists(path))
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     unittest.main()
22 | 


--------------------------------------------------------------------------------
/src/tests/data/videos/.content:
--------------------------------------------------------------------------------
1 | I_AM_VIDEO_TESTDATA_DIR
2 | 


--------------------------------------------------------------------------------
/src/tests/data/videos/12727.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/tests/data/videos/12727.mp4


--------------------------------------------------------------------------------
/src/tests/data/videos/12968.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/tests/data/videos/12968.mp4


--------------------------------------------------------------------------------
/src/tests/env/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/tests/env/__init__.py


--------------------------------------------------------------------------------
/src/tests/env/test_config.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import os
 3 | 
 4 | from common.config import clear, get_config
 5 | 
 6 | class TestConfig(unittest.TestCase):
 7 | 
 8 |     def setUp(self):
 9 |         clear()
10 |         if 'V2D_CONFIG_FILE' in os.environ:
11 |             del os.environ['V2D_CONFIG_FILE']
12 | 
13 |     def test_json(self):
14 |         self.assertTrue(get_config())
15 | 
16 |     def test_json_docker(self):
17 |         os.environ['V2D_CONFIG_FILE'] = 'config_docker.json'
18 |         self.assertTrue(get_config())
19 | 
20 |     def test_json_bad_file(self):
21 |         os.environ['V2D_CONFIG_FILE'] = 'config_bad_filename.json'
22 |         with self.assertRaises(IOError):
23 |             get_config()
24 | 
25 | if __name__ == '__main__':
26 |     unittest.main()
27 | 


--------------------------------------------------------------------------------
/src/tests/env/test_video_parsing.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import unittest
 3 | import os
 4 | 
 5 | from tests.data import fetcher
 6 | 
 7 | 
 8 | class TestVideoParsing(unittest.TestCase):
 9 | 
10 |     def test_opencv_videocapture(self):
11 |         path = fetcher.get_videopath("12727.mp4")
12 |         self.assertTrue(os.path.exists(path))
13 |         vcap = cv2.VideoCapture(path)
14 |         success_count = 0
15 |         while True:
16 |             success, _ = vcap.read()
17 |             if not success:
18 |                 break
19 |             success_count += 1
20 |         self.assertGreater(success_count, 3*15)
21 |         self.assertLess(success_count, 15*30)
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     unittest.main()
26 | 


--------------------------------------------------------------------------------
/tests/e2e/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/tests/e2e/__init__.py


--------------------------------------------------------------------------------
/tests/e2e/test_external.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import json
 3 | import os
 4 | 
 5 | from parameterized import parameterized
 6 | from selenium.webdriver import Firefox
 7 | from selenium.webdriver.firefox.options import Options
 8 | from selenium.webdriver.common.by import By
 9 | from selenium.webdriver.support import expected_conditions as EC
10 | from selenium.webdriver.support.ui import WebDriverWait
11 | 
12 | WEB_URL = "http://localhost:8080"
13 | ROOT_PATH = "src"
14 | CONFIG_FILE = "src/config.json"
15 | 
16 | 
17 | class TestExternal(unittest.TestCase):
18 |     """Test from as a end user."""
19 | 
20 |     def setUp(self):
21 |         options = Options()
22 |         options.add_argument('-headless')
23 |         self.driver = Firefox(options=options)
24 | 
25 |     def tearDown(self):
26 |         self.driver.close()
27 | 
28 |     def get_video_path(self, fname):
29 |         with open(CONFIG_FILE, "r") as fin:
30 |             dir_videos = json.load(fin)["tests"]["dir_videos"]
31 |             path = os.path.abspath(
32 |                 os.path.join(ROOT_PATH, dir_videos, fname))
33 |             self.assertTrue(os.path.exists(path))
34 |             return path
35 | 
36 |     @parameterized.expand([
37 |         ("12727.mp4", "two men are talking about a cooking show"),
38 |         ("12968.mp4", "a woman is talking about a makeup face"),
39 |     ])
40 |     def test_upload_and_verify(self, fname, caption):
41 |         """
42 |         Tests uploading a video and verify the response.
43 |         Note: The tests values are currently hard coded to a specific trained
44 |               model and it might fail for other models.
45 |         """
46 |         self.driver.get(WEB_URL)
47 |         video_path = self.get_video_path(fname)
48 |         text_vprocessing = "Video is being uploaded and processed"
49 |         self.driver.find_element_by_xpath("//input[@type='file']").send_keys(video_path)
50 |         self.assertNotIn(text_vprocessing,
51 |                          self.driver.find_element_by_id("notifications").text)
52 |         self.driver.find_element_by_xpath("//input[@value='Upload Video']").click()
53 |         self.assertIn(text_vprocessing,
54 |                       self.driver.find_element_by_id("notifications").text)
55 |         WebDriverWait(self.driver, 120).until(
56 |             EC.text_to_be_present_in_element(
57 |                 (By.ID, "notifications"), "Request Completed")
58 |         )
59 |         self.assertIn(caption, self.driver.find_element_by_id("results").text.lower())
60 | 


--------------------------------------------------------------------------------
/uploads/touched:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/uploads/touched


--------------------------------------------------------------------------------