├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── labeler.yml └── workflows │ ├── ci.yml │ └── label.yml ├── .gitignore ├── LICENSE ├── README.md ├── REFERENCES.md ├── backend.Dockerfile ├── dataset ├── test_videodatainfo_2017.json ├── videodatainfo_2017.json └── videos │ └── README.md ├── dataset_cache └── README.md ├── docker-compose.yml ├── docker_build.sh ├── docker_hub.sh ├── docker_test.sh ├── early_tests.sh ├── environment.yml ├── f5c22f7_images ├── 10802.gif ├── 12501.gif ├── 12589.gif ├── 12683.gif ├── 12727.gif ├── 12901.gif ├── 12908.gif ├── 12937.gif ├── 12939.gif ├── 12966.gif ├── 12968.gif └── 12994.gif ├── frontend.Dockerfile ├── images ├── attention.png ├── model_audio.png ├── model_video.png ├── model_word.png └── sentence_model.png ├── keep_alive.sh ├── models └── README.md ├── requirements.txt ├── src ├── __init__.py ├── backend │ ├── CombinedResults │ │ ├── calculate_score_results.sh │ │ ├── calculate_total_score_json.py │ │ ├── find_total_sentences_unique.py │ │ └── summary.sh │ ├── __init__.py │ ├── data.py │ ├── framework.py │ ├── model.py │ ├── parser.py │ ├── plotepochlog.py │ ├── pred.sh │ ├── preprocess.py │ ├── run.sh │ ├── test_on_trained.sh │ ├── train.py │ ├── utils.py │ ├── videohandler.py │ ├── vocab.py │ └── vpreprocess.py ├── common │ ├── __init__.py │ ├── config.py │ ├── logger.py │ ├── rpc.py │ └── status.py ├── config.json ├── config_docker.json ├── frontend │ ├── __init__.py │ ├── app.py │ └── templates │ │ ├── get_ids.html │ │ ├── index.html │ │ ├── layout.html │ │ ├── play.html │ │ ├── predict.html │ │ └── publicindex.html ├── run_tests.sh └── tests │ ├── __init__.py │ ├── data │ ├── __init__.py │ ├── fetcher.py │ ├── test_fetcher.py │ └── videos │ │ ├── .content │ │ ├── 12727.mp4 │ │ └── 12968.mp4 │ └── env │ ├── __init__.py │ ├── test_config.py │ └── test_video_parsing.py ├── tests └── e2e │ ├── __init__.py │ └── test_external.py └── uploads └── touched /.dockerignore: -------------------------------------------------------------------------------- 1 | .git -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Sample video** 24 | Provide a sample video (if applicable). 25 | 26 | **Screenshots** 27 | If applicable, add screenshots to help explain your problem. 28 | 29 | **Desktop (please complete the following information):** 30 | - OS: [e.g. iOS] 31 | - Docker Version 32 | - Docker Compose Version 33 | - Browser [e.g. chrome, safari] 34 | - Version [e.g. 22] 35 | 36 | **Additional context** 37 | Add any other context about the problem here. 38 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | env: 2 | - Dockerfile 3 | - environment.yml 4 | - requirements.txt 5 | 6 | config: 7 | - src/config.json 8 | - src/config_docker.json 9 | 10 | test: 11 | - tests/* 12 | - tests/**/* 13 | - src/tests/* 14 | - src/tests/**/* 15 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - VideoCaption 7 | - actions 8 | - speedo 9 | 10 | pull_request: 11 | branches: 12 | - VideoCaption 13 | 14 | schedule: 15 | - cron: "11 7 1 * *" 16 | 17 | workflow_dispatch: 18 | 19 | jobs: 20 | build: 21 | runs-on: ubuntu-latest 22 | strategy: 23 | matrix: 24 | python-version: [2.7] 25 | 26 | steps: 27 | - uses: actions/checkout@v2 28 | 29 | - name: Docker Pull 30 | run: | 31 | bash docker_hub.sh pull 32 | 33 | - name: Install dependencies 34 | run: | 35 | pip install -r requirements.txt 36 | bash early_tests.sh 37 | sudo apt-get install firefox 38 | wget 'https://github.com/mozilla/geckodriver/releases/download/v0.30.0/geckodriver-v0.30.0-linux64.tar.gz' 39 | tar -xvzf geckodriver-v0.30.0-linux64.tar.gz 40 | sudo mv geckodriver /usr/local/bin 41 | 42 | - name: Build 43 | run: | 44 | bash docker_build.sh 45 | 46 | - name: Test 47 | run: | 48 | bash docker_test.sh 49 | 50 | # Push image to docker on 'push' event only. 51 | - name: Deploy to Docker Hub 52 | if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} 53 | env: 54 | DOCKER_USERNAME: "scopeinfinity" 55 | DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} 56 | run: | 57 | { echo "$DOCKER_TOKEN" | docker login -u "$DOCKER_USERNAME" --password-stdin ; } && bash docker_hub.sh push 58 | -------------------------------------------------------------------------------- /.github/workflows/label.yml: -------------------------------------------------------------------------------- 1 | # This workflow will triage pull requests and apply a label based on the 2 | # paths that are modified in the pull request. 3 | # 4 | # To use this workflow, you will need to set up a .github/labeler.yml 5 | # file with configuration. For more information, see: 6 | # https://github.com/actions/labeler/blob/master/README.md 7 | 8 | name: Labeler 9 | on: [pull_request] 10 | 11 | jobs: 12 | label: 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/labeler@v2 18 | with: 19 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Other Git Projects 2 | FFmpeg/ 3 | 4 | # Learning Session Files 5 | *_state.txt 6 | *_model.dat 7 | *_logs.txt 8 | *_model.dat_*_loss_* 9 | *_logs_epoch_.txt 10 | *_logs_.txt 11 | model_*.dat* 12 | state_*.txt 13 | 14 | # Local Files to ignore 15 | display 16 | *.mp4 17 | *.mp4_ 18 | 19 | # Backup Files 20 | *~ 21 | *# 22 | .#* 23 | 24 | # Files taken from other source 25 | 26 | glove/* 27 | models/* 28 | 29 | # Dynamically Generated Files 30 | vocab.dat 31 | glove.dat 32 | log.txt 33 | 34 | # Byte-compiled / optimized / DLL files 35 | __pycache__/ 36 | *.py[cod] 37 | *$py.class 38 | 39 | # C extensions 40 | *.so 41 | 42 | # Distribution / packaging 43 | .Python 44 | env/ 45 | build/ 46 | develop-eggs/ 47 | dist/ 48 | downloads/ 49 | eggs/ 50 | .eggs/ 51 | lib/ 52 | lib64/ 53 | parts/ 54 | sdist/ 55 | var/ 56 | wheels/ 57 | *.egg-info/ 58 | .installed.cfg 59 | *.egg 60 | 61 | # PyInstaller 62 | # Usually these files are written by a python script from a template 63 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 64 | *.manifest 65 | *.spec 66 | 67 | # Installer logs 68 | pip-log.txt 69 | pip-delete-this-directory.txt 70 | 71 | # Unit test / coverage reports 72 | htmlcov/ 73 | .tox/ 74 | .coverage 75 | .coverage.* 76 | .cache 77 | nosetests.xml 78 | coverage.xml 79 | *.cover 80 | .hypothesis/ 81 | 82 | # Translations 83 | *.mo 84 | *.pot 85 | 86 | # Django stuff: 87 | *.log 88 | local_settings.py 89 | 90 | # Flask stuff: 91 | instance/ 92 | .webassets-cache 93 | 94 | # Scrapy stuff: 95 | .scrapy 96 | 97 | # Sphinx documentation 98 | docs/_build/ 99 | 100 | # PyBuilder 101 | target/ 102 | 103 | # Jupyter Notebook 104 | .ipynb_checkpoints 105 | 106 | # pyenv 107 | .python-version 108 | 109 | # celery beat schedule file 110 | celerybeat-schedule 111 | 112 | # SageMath parsed files 113 | *.sage.py 114 | 115 | # dotenv 116 | .env 117 | 118 | # virtualenv 119 | .venv 120 | venv/ 121 | ENV/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Video Captioning [](https://travis-ci.com/scopeInfinity/Video2Description) [](https://hits.seeyoufarm.com) 2 | Generate caption for the given video clip 3 | 4 | Branch : [VideoCaption](https://github.com/scopeInfinity/Video2Description/tree/VideoCaption) (1a2124d), [VideoCaption_catt](https://github.com/scopeInfinity/Video2Description/tree/VideoCaption_catt) (647e73b4) 5 | 6 | ### Model 7 | 8 | Model generates natural sentence word by word 9 | 10 |  11 | 12 | | Audio SubModel | Video SubModel | Sentence Generation SubModel | 13 | | :-------------: |:-------------:| :-----:| 14 | | ![audio_model][audio_model]| ![video_model][video_model] | ![sentence_generation][sentence_generation] 15 | 16 | [audio_model]: https://github.com/scopeInfinity/Video2Description/raw/VideoCaption/images/model_audio.png 17 | [video_model]: https://github.com/scopeInfinity/Video2Description/raw/VideoCaption/images/model_video.png 18 | [sentence_generation]: https://github.com/scopeInfinity/Video2Description/raw/VideoCaption/images/model_word.png 19 | 20 | Context extraction for Temporal Attention Model, at ith word generation 21 | 22 |  23 | 24 | 25 | ### Results - *f5c22f7* 26 | 27 | Test videos with good results 28 | 29 | | | | | 30 | | :-------------: |:-------------:| :-----:| 31 | | ![12727][12727]| ![12501][12501] | ![10802][10802] 32 | | two men are talking about a cooking show | a woman is cooking | a dog is running around a field | 33 | | ![12968][12968] | ![12937][12937] | ![12939][12939] 34 | | a woman is talking about a makeup face | a man is driving a car down the road | a man is cooking in a kitchen 35 | | ![12683][12683] | ![12901][12901] | ![12994][12994] 36 | | a man is playing a video game | two men are playing table tennis in a stadium | a man is talking about a computer program 37 | 38 | 39 | Test videos with poor results 40 | 41 | | | | | 42 | | :-------------: |:-------------:| :-----:| 43 | | ![12589][12589]| ![12966][12966] | ![12908][12908] 44 | | a person is playing with a toy | a man is walking on the field | a man is standing in a gym | 45 | 46 | [12727]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12727.gif 47 | [12501]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12501.gif 48 | [10802]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/10802.gif 49 | 50 | [12968]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12968.gif 51 | [12937]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12937.gif 52 | [12939]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12939.gif 53 | 54 | [12683]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12683.gif 55 | [12901]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12901.gif 56 | [12994]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12994.gif 57 | 58 | 59 | [12589]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12589.gif 60 | [12966]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12966.gif 61 | [12908]: https://raw.githubusercontent.com/scopeInfinity/Video2Description/VideoCaption/f5c22f7_images/12908.gif 62 | 63 | 64 | ### Try it out!!! 65 | * Please feel free to raise PR with necessary suggestions. 66 | * Clone the repository` 67 | * `git clone https://github.com/scopeInfinity/Video2Description.git` 68 | * Install docker and docker-compose 69 | * Current config has docker-compose file format '3.2'. 70 | * https://github.com/docker/compose/releases 71 | * ```bash 72 | sudo apt-get install docker.io 73 | sudo curl -L "https://github.com/docker/compose/releases/download/1.25.4/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose 74 | sudo chmod +x /usr/local/bin/docker-compose 75 | ``` 76 | * docs 77 | * https://docs.docker.com/install/linux/docker-ce/ubuntu/ 78 | * https://docs.docker.com/compose/install/ 79 | 80 | * Pull the prebuild images and run the container 81 | ```bash 82 | $ docker-compose pull 83 | $ docker-compose up 84 | ``` 85 | * Browse to `http://localhost:8080/` 86 | * backend might take few minutes to reach a stable stage. 87 | 88 | ##### Execution without Docker 89 | * We can go always go through `backend.Dockerfile` and `frontend.Dockerfile` to understand better. 90 | * Update `src/config.json` as per the requirement and use those path during upcoming steps. 91 | * To know more about any field, just search for the reference in the codebase. 92 | * Install miniconda 93 | * https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html 94 | * Get `glove.6B.300d.txt` from `https://nlp.stanford.edu/projects/glove/` 95 | * Install ffmpeg 96 | * Configure, build and install ffmpeg from source with shared libraries 97 | ```bash 98 | $ git clone 'https://github.com/FFmpeg/FFmpeg.git' 99 | $ cd FFmpeg 100 | $ ./configure --enable-shared # Use --prefix if need to install in custom directory 101 | $ make 102 | # make install 103 | ``` 104 | * If required, use `https://github.com/tylin/coco-caption/` for scoring the model. 105 | * Then create conda environment using `environment.yml` 106 | * `$ conda env create -f environment.yml` 107 | * And activate the environment 108 | ``` 109 | $ conda activate . 110 | ``` 111 | * Turn up the backend 112 | * `src$ python -m backend.parser server --start --model /path/to/model` 113 | * Turn up the web frontend 114 | * `src$ python -m frontend.app` 115 | 116 | ### Info 117 | 118 | Data Directory and Working Directory can be same as the project root directory. 119 | 120 | ### Data Directory 121 | File | Reference 122 | --- | --- 123 | */path/to/data_dir/VideoDataset/videodatainfo_2017.json* | http://ms-multimedia-challenge.com/2017/dataset 124 | */path/to/data_dir/VideoDataset/videos/[0-9]+.mp4* | Download videos based on above dataset 125 | */path/to/data_dir/glove/glove.6B.300d.txt* | https://nlp.stanford.edu/projects/glove/ 126 | */path/to/data_dir/VideoDataset/cache_40_224x224/[0-9]+.npy* | Video cached files will be created on fly 127 | 128 | ### Working Directory 129 | File | Content 130 | --- | --- 131 | */path/to/working_dir/glove.dat* | Pickle Dumped Glove Embedding 132 | */path/to/working_dir/vocab.dat* | Pickle Dumped Vocabulary Words 133 | 134 | ### Download Dataset 135 | * Execute `python videohandler.py` from *VideoDataset* Directory 136 | 137 | ### Execution 138 | It currently supports train, predict and server mode. Please use the following command for better explanation. 139 | ```bash 140 | src$ python -m backend.parse -h 141 | ``` 142 | 143 | ### Training Methods 144 | 145 | * Try Iterative Learning 146 | * Try Random Learning 147 | 148 | ### Evaluation 149 | 150 | #### Prerequisite 151 | ```bash 152 | cd /path/to/eval_dir/ 153 | git clone 'https://github.com/tylin/coco-caption.git' cococaption 154 | ln /path/to/working_dir/cocoeval.py cococaption/ 155 | ``` 156 | #### Evaluate 157 | ```bash 158 | # One can do changes in parser.py for numbers of test examples to be considered in evaluation 159 | python parser.py predict save_all_test 160 | python /path/to/eval_dir/cocoeval.py .txt 161 | ``` 162 | 163 | #### Sample Evaluation while training 164 | 165 | Commit | Training | Total | CIDEr | Bleu_4 | ROUGE_L | METEOR | Model Filename 166 | --- | --- | --- | --- | --- | --- | --- | --- 167 | 647e73b4 | 10 epochs | 1.1642 | 0.1580 | 0.3090 | 0.4917 | 0.2055 | CAttention_ResNet_D512L512_G128G64_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4990_loss_2.484_Cider0.360_Blue0.369_Rouge0.580_Meteor0.256 168 | 1a2124d | 17 epochs | 1.1599 | 0.1654 | 0.3022 | 0.4849 | 0.2074 | ResNet_D512L512_G128G64_D1024D0.20BN_BDLSTM1024_D0.2L1024DVS_model.dat_4987_loss_2.203_Cider0.342_Blue0.353_Rouge0.572_Meteor0.256 169 | f5c22f7 | 17 epochs | 1.1559 | 0.1680 | 0.3000 | 0.4832 | 0.2047 | ResNet_D512L512_G128G64_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4983_loss_2.350_Cider0.355_Blue0.353_Rouge0.571_Meteor0.247_TOTAL_1.558_BEST 170 | bd072ac | 11 CPUhrs with Multiprocessing (16 epochs) | 1.0736 | 0.1528 | 0.2597 | 0.4674 | 0.1936 | ResNet_D512L512_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4986_loss_2.306_Cider0.347_Blue0.328_Rouge0.560_Meteor0.246 171 | 3ccf5d5 | 15 CPUhrs | 1.0307 | 0.1258 | 0.2535 | 0.4619 | 0.1895 | res_mcnn_rand_b100_s500_model.dat_model1_3ccf5d5 172 | 173 | Check `Specifications` section for model comparision. 174 | 175 | 176 | Temporal attention Model for is on `VideoCaption_catt` branch. 177 | 178 | Pre-trained Models : https://drive.google.com/open?id=1gexBRQfrjfcs7N5UI5NtlLiIR_xa69tK 179 | 180 | ### Web Server 181 | 182 | - Start the server **(S)** for to compute predictions (Within conda environment) 183 | ```bash 184 | python parser.py server -s -m 185 | ``` 186 | - Check `config.json` for configurations. 187 | - Execute `python app.py` from webserver (No need for conda environment) 188 | - Make sure, your the process is can new files inside `$UPLOAD_FOLDER` 189 | - Open `http://webserver:5000/` to open Web Server for testing (under default configuration) 190 | 191 | ### Specifications 192 | 193 | ##### Commit: 3ccf5d5 194 | - ResNet over LSTM for feature extraction 195 | - Word by Word generation based on last prediction for Sentence Generation using LSTM 196 | - Random Dataset Learning of training data 197 | - Vocab Size 9448 198 | - Glove of 300 Dimension 199 | 200 | ##### Commit: bd072ac 201 | - ResNet over BiDirection GRU for feature extraction 202 | - Sequential Learning of training data 203 | - Batch Normalization + Few more tweaks in Model 204 | - Bleu, CIDEr, Rouge, Meteor score generation for validation 205 | - Multiprocessing keras 206 | 207 | ##### Commit: f5c22f7 208 | - Audio with BiDirection GRU 209 | 210 | ##### Commit: 1a2124d 211 | - Audio with BiDirection LSTM 212 | 213 | ##### Commit: 647e73b 214 | - Audio with BiDirection GRU using temporal attention for context 215 | 216 | # Image Captioning 217 | Generate caption for the given images 218 | 219 | Branch : [onehot_gen](https://github.com/scopeInfinity/Video2Description/tree/onehot_gen) 220 | 221 | Commit : [898f15778d40b67f333df0a0e744a4af0b04b16c](https://github.com/scopeInfinity/Video2Description/commit/898f15778d40b67f333df0a0e744a4af0b04b16c) 222 | 223 | Trained Model : https://drive.google.com/open?id=1qzMCAbh_tW3SjMMVSPS4Ikt6hDnGfhEN 224 | 225 | Categorical Crossentropy Loss : 0.58 226 | 227 | -------------------------------------------------------------------------------- /REFERENCES.md: -------------------------------------------------------------------------------- 1 | [1] J. Xu, T. Mei, T. Yao, Y. Rui. MSR-VTT: A Large Video Description Dataset for 2 | Bridging Video and Language, In Proceedings of CVPR, 2016. 3 | 4 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Deep Residual Learning for Image Recognition 2016 IEEE Conference on Computer 6 | Vision and Pattern Recognition (CVPR) 7 | 8 | [3] Jeffrey Pennington, Richard Socher, and Christopher D. Manning. GloVe: Global Vec- 9 | tors for Word Representation (2014) 10 | 11 | [4] McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis, Matt McVicar, Eric Bat- 12 | tenberg, and Oriol Nieto. librosa: Audio and music signal analysis in python. In Pro- 13 | ceedings of the 14th python in science conference, pp. 18-25. 2015 14 | 15 | [5] Oriol Vinyals, Alexander Toshev, Samy Bengio, Dumitru Erhan. Show and Tell: A 16 | Neural Image Caption Generator (April 2015) 17 | 18 | [6] Jeff Donahue, Lisa Anne Hendricks, Marcus Rohrbach, Subhashini Venugopalan, Sergio 19 | Guadarrama, Kate Saenko, Trevor Darrell Long-term Recurrent Convolutional Networks 20 | for Visual Recognition and Description (Nov 2014) 21 | 22 | [7] Subhashini Venugopalan, Marcus Rohrbach, Jeff Donahue, Raymond Mooney, Trevor 23 | Darrell, Kate Saenko and Raymond Moone, Sequence to Sequence Video to Text (May 24 | 2015) 25 | 26 | [8] Ilya Sutskever, Oriol Vinyals, Quoc V. Le, Sequence to Sequence Learning with Neural 27 | Networks (Sep 2014) 28 | 29 | [9] Ramakrishna Vedantam, C. Lawrence Zitnick and Devi Parikh CIDEr: Consensus- 30 | based Image Description Evaluation, (The Computer Vision Foundation 2015) 31 | 32 | [10] Alon Lavie and Michael Denkowski, The METEOR Metric for Automatic Evaluation 33 | of Machine Translation, Machine Translation, 2010 34 | 35 | [11] Kishore Papineni,Salim Roukos,Todd Ward and Wei-Jing Zhu BLEU: a method for 36 | automatic evaluation of machine translation ACL ’02 Proceedings of the 40th Annual 37 | Meeting on Association for Computational Linguistics, Pages 311-318 (2002) 38 | 39 | [12] Lin, C.Y.Rouge: A package for automatic evaluation of summaries. In Text Summa- 40 | rization Branches Out: Proceedings of the ACL 04 Workshop (pp. 74-81) (2004, July) 41 | -------------------------------------------------------------------------------- /backend.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:xenial as my_base 2 | RUN apt-get update 3 | RUN apt-get install -y libsamplerate0 curl libsndfile1 pkg-config nasm wget zip 4 | 5 | FROM my_base as ffmpeg_builder 6 | WORKDIR /tmp 7 | RUN wget https://github.com/FFmpeg/FFmpeg/archive/master.zip -O ffmpeg.zip 8 | RUN unzip ffmpeg.zip 9 | RUN rm ffmpeg.zip 10 | WORKDIR /tmp/FFmpeg-master/ 11 | RUN ./configure --enable-shared 12 | RUN make -j32 13 | 14 | 15 | FROM my_base as glove_builder 16 | WORKDIR /tmp 17 | # https://nlp.stanford.edu/projects/glove/ 18 | RUN wget http://nlp.stanford.edu/data/glove.6B.zip && \ 19 | unzip glove.6B.zip glove.6B.300d.txt && \ 20 | rm glove.6B.zip 21 | 22 | 23 | FROM my_base as deploy 24 | # FROM conda/miniconda2 25 | RUN apt-get update 26 | RUN apt-get install -y libsamplerate0 curl libsndfile1 pkg-config nasm wget zip 27 | RUN useradd -m -s /bin/bash si 28 | RUN mkdir /var/log/v2d 29 | RUN chown si:si /var/log/v2d 30 | RUN chmod 700 /var/log/v2d 31 | USER si 32 | 33 | # Installing miniconda 34 | RUN wget -N https://repo.anaconda.com/miniconda/Miniconda2-latest-Linux-x86_64.sh -O /tmp/Miniconda2-latest-Linux-x86_64.sh 35 | RUN bash /tmp/Miniconda2-latest-Linux-x86_64.sh -b 36 | RUN rm /tmp/Miniconda2-latest-Linux-x86_64.sh 37 | USER root 38 | RUN ln -s /home/si/miniconda2/bin/conda /usr/bin/ 39 | USER si 40 | 41 | # glove 42 | RUN mkdir -p /home/si/v2d/dataset 43 | WORKDIR /home/si/v2d/dataset 44 | COPY --from=glove_builder /tmp/glove.6B.300d.txt /home/si/v2d/dataset/glove.6B.300d.txt 45 | 46 | # ffmpeg build and install 47 | COPY --from=ffmpeg_builder /tmp/FFmpeg-master/ /tmp/FFmpeg-master/ 48 | WORKDIR /tmp/FFmpeg-master/ 49 | USER root 50 | RUN make install 51 | USER si 52 | RUN echo 'export LD_LIBRARY_PATH=/usr/local/lib' >> /home/si/.bashrc 53 | 54 | # coco-caption 55 | WORKDIR /home/si 56 | RUN wget -N 'https://github.com/tylin/coco-caption/archive/master.zip' -O coco.zip && \ 57 | unzip coco.zip && \ 58 | mv coco-caption-master coco-caption && \ 59 | rm coco.zip 60 | 61 | # Create conda environment 62 | # Note: ffmpeg with --enable-shared should be before installing opencv 63 | WORKDIR /home/si/v2d/ 64 | COPY --chown=si:si environment.yml /home/si/v2d/ 65 | RUN conda env create -f environment.yml 66 | RUN conda init bash 67 | RUN echo "conda activate V2D" >> /home/si/.bashrc 68 | 69 | # Prepare basic files 70 | ENV V2D_CONFIG_FILE=config_docker.json 71 | RUN mkdir -p /home/si/v2d/dataset 72 | RUN mkdir -p /home/si/v2d/dataset_cache 73 | RUN mkdir -p /home/si/v2d/models 74 | RUN mkdir -p /tmp/v2d/app/uploads 75 | COPY --chown=si:si dataset/videodatainfo_2017.json /home/si/v2d/dataset/ 76 | COPY --chown=si:si dataset/test_videodatainfo_2017.json /home/si/v2d/dataset/ 77 | COPY --chown=si:si src/ /home/si/v2d/src/ 78 | WORKDIR /home/si/v2d/src 79 | 80 | # Prepares cache for pretrained model 81 | COPY --chown=si:si models/ /home/si/v2d/models/ 82 | WORKDIR /home/si/v2d/models/ 83 | RUN wget -q -N 'https://github.com/scopeInfinity/Video2Description/releases/download/models/ResNet_D512L512_G128G64_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4983_loss_2.350_Cider0.355_Blue0.353_Rouge0.571_Meteor0.247_TOTAL_1.558_BEST' 84 | RUN echo "Available Models:" 85 | RUN ls -1 /home/si/v2d/models 86 | 87 | WORKDIR /home/si/v2d/src/ 88 | RUN conda run -n V2D python -m backend.parser server --init-only -m /home/si/v2d/models/ResNet_D512L512_G128G64_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4983_loss_2.350_Cider0.355_Blue0.353_Rouge0.571_Meteor0.247_TOTAL_1.558_BEST 89 | -------------------------------------------------------------------------------- /dataset/videos/README.md: -------------------------------------------------------------------------------- 1 | Download dataset video here -------------------------------------------------------------------------------- /dataset_cache/README.md: -------------------------------------------------------------------------------- 1 | Directory to store datastore cache 2 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.2' 2 | services: 3 | backend: 4 | image: scopeinfinity/video2description:deploy 5 | environment: 6 | V2D_CONFIG_FILE: config_docker.json 7 | networks: 8 | internal: 9 | ipv4_address: 172.14.0.2 10 | volumes: 11 | - uploads:/home/si/v2d/uploads/:ro 12 | - ./src:/home/si/v2d/src/:ro 13 | entrypoint: /bin/bash -i -c 'python -m backend.parser server -s -m /home/si/v2d/models/ResNet_D512L512_G128G64_D1024D0.20BN_BDGRU1024_D0.2L1024DVS_model.dat_4983_loss_2.350_Cider0.355_Blue0.353_Rouge0.571_Meteor0.247_TOTAL_1.558_BEST 2>&1' 14 | 15 | frontend: 16 | build: 17 | context: . 18 | dockerfile: frontend.Dockerfile 19 | image: scopeinfinity/video2description:frontend 20 | ports: 21 | - "8080:5000" 22 | environment: 23 | V2D_CONFIG_FILE: config_docker.json 24 | networks: 25 | internal: 26 | ipv4_address: 172.14.0.3 27 | volumes: 28 | - uploads:/home/si/v2d/uploads/ 29 | - ./src:/home/si/v2d/src/:ro 30 | entrypoint: /bin/bash -c 'python -m frontend.app 2>&1' 31 | 32 | volumes: 33 | uploads: 34 | 35 | networks: 36 | internal: 37 | ipam: 38 | driver: default 39 | config: 40 | - subnet: "172.14.0.0/24" -------------------------------------------------------------------------------- /docker_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | remote="scopeinfinity/video2description" 3 | for file_tag in "backend.Dockerfile ffmpeg_builder" "backend.Dockerfile glove_builder" "backend.Dockerfile deploy" "frontend.Dockerfile frontend"; do 4 | set -- $file_tag 5 | docker build --target $2 -t $remote:$2 --cache-from $remote:$2 --build-arg BUILDKIT_INLINE_CACHE=1 -f $1 . 6 | done 7 | -------------------------------------------------------------------------------- /docker_hub.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker $1 scopeinfinity/video2description:frontend 3 | docker $1 scopeinfinity/video2description:ffmpeg_builder 4 | docker $1 scopeinfinity/video2description:glove_builder 5 | docker $1 scopeinfinity/video2description:deploy -------------------------------------------------------------------------------- /docker_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | trap 'kill $(jobs -p) || echo "No background jobs"' EXIT 5 | 6 | TIMEOUT_WAIT_FOR_BACKEND=${1:-5} # in minutes 7 | 8 | echo "[docker][backend] ./run_tests.sh" 9 | docker container run scopeinfinity/video2description:deploy conda run -n V2D /bin/bash -c 'cd /home/si/v2d/src/ && ./run_tests.sh' 10 | 11 | docker-compose up --detach 12 | docker-compose logs -f & 13 | 14 | for x in `seq ${TIMEOUT_WAIT_FOR_BACKEND}`;do 15 | sleep "1m"; 16 | curl "http://localhost:8080/model_weights_status" 2>&1 | tee /dev/stderr | grep -q '\[SUCCESS\]' && break; 17 | done 2>&1 || { echo "Backend model_weights_status failed to come to success"; exit 1; } 18 | echo "Backend model_weights_status: SUCCESS" 19 | 20 | 21 | # Run tests external to docker 22 | echo "[external] Executing tests on [docker][deploy]" 23 | python -m unittest discover tests/ 24 | 25 | docker-compose down -------------------------------------------------------------------------------- /early_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Execute light tests which can be called before setting up environment to save time and resources. 4 | cd src/ 5 | python -m unittest tests.env.test_config -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: V2D 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - pip 7 | - python=2.7 8 | - librosa 9 | - pip: 10 | - six 11 | - opencv-python-headless==4.1.1.26 12 | - numpy 13 | - flask 14 | - matplotlib 15 | - pylint 16 | - h5py<3.0.0 17 | - gtk2 18 | - urllib3 19 | - waitress 20 | - keras==2.0.8 21 | - tensorflow==1.2.1 22 | -------------------------------------------------------------------------------- /f5c22f7_images/10802.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/10802.gif -------------------------------------------------------------------------------- /f5c22f7_images/12501.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12501.gif -------------------------------------------------------------------------------- /f5c22f7_images/12589.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12589.gif -------------------------------------------------------------------------------- /f5c22f7_images/12683.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12683.gif -------------------------------------------------------------------------------- /f5c22f7_images/12727.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12727.gif -------------------------------------------------------------------------------- /f5c22f7_images/12901.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12901.gif -------------------------------------------------------------------------------- /f5c22f7_images/12908.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12908.gif -------------------------------------------------------------------------------- /f5c22f7_images/12937.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12937.gif -------------------------------------------------------------------------------- /f5c22f7_images/12939.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12939.gif -------------------------------------------------------------------------------- /f5c22f7_images/12966.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12966.gif -------------------------------------------------------------------------------- /f5c22f7_images/12968.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12968.gif -------------------------------------------------------------------------------- /f5c22f7_images/12994.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/f5c22f7_images/12994.gif -------------------------------------------------------------------------------- /frontend.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2 as frontend 2 | RUN pip install enum34 flask waitress 3 | 4 | RUN useradd -m -s /bin/bash si 5 | RUN mkdir -p /home/si/v2d/uploads 6 | RUN chown si:si /home/si/v2d/uploads 7 | USER si 8 | 9 | # Prepare basic files 10 | ENV V2D_CONFIG_FILE=config_docker.json 11 | RUN mkdir -p /tmp/v2d/app/uploads 12 | COPY --chown=si:si src/frontend /home/si/v2d/src/frontend/ 13 | COPY --chown=si:si src/common /home/si/v2d/src/common/ 14 | COPY --chown=si:si src/*.json /home/si/v2d/src/ 15 | COPY --chown=si:si src/__init__.py /home/si/v2d/src/__init__.py 16 | WORKDIR /home/si/v2d/src -------------------------------------------------------------------------------- /images/attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/images/attention.png -------------------------------------------------------------------------------- /images/model_audio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/images/model_audio.png -------------------------------------------------------------------------------- /images/model_video.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/images/model_video.png -------------------------------------------------------------------------------- /images/model_word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/images/model_word.png -------------------------------------------------------------------------------- /images/sentence_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/images/sentence_model.png -------------------------------------------------------------------------------- /keep_alive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Keep streaming something every minute upto X or job get completed whichever first. 3 | TIMEOUT="${1:?}" 4 | EXEC="${2:?}" 5 | shift 2 6 | timeout "${TIMEOUT}m" bash -c 'while true;do echo "Time: $(date)"; sleep 1m;done;' & 7 | TIMER_PID="$!" 8 | (timeout "${TIMEOUT}m" $EXEC "$@";kill $TIMER_PID) 9 | echo "Exiting keep alive" -------------------------------------------------------------------------------- /models/README.md: -------------------------------------------------------------------------------- 1 | Store trained models here -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | parameterized 2 | selenium -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/__init__.py -------------------------------------------------------------------------------- /src/backend/CombinedResults/calculate_score_results.sh: -------------------------------------------------------------------------------- 1 | ls result*.txt | xargs -I {} sh -c '[ ! -f eval_{} ] && python ../../cococaption/cocoeval.py {} | tail -n 1 > eval_{}' 2 | -------------------------------------------------------------------------------- /src/backend/CombinedResults/calculate_total_score_json.py: -------------------------------------------------------------------------------- 1 | import ast 2 | a = ast.literal_eval(raw_input().strip()) 3 | z=0 4 | for x in a.keys(): 5 | if x[-1] == '3' or x[-1] == '2' or x[-1] == '1': 6 | continue 7 | z+=a[x] 8 | print(z) 9 | -------------------------------------------------------------------------------- /src/backend/CombinedResults/find_total_sentences_unique.py: -------------------------------------------------------------------------------- 1 | import json, sys 2 | z = json.load(open(sys.argv[1])) 3 | sentences = [x['caption'] for x in z['predicted']] 4 | print("%d Unique sentences out of %d"%(len(set(sentences)),len(sentences))) 5 | -------------------------------------------------------------------------------- /src/backend/CombinedResults/summary.sh: -------------------------------------------------------------------------------- 1 | ls result_* | xargs -I {} sh -c 'echo {}; [ -f eval_{} ] && cat eval_{} &&cat eval_{} | python calculate_total_score_json.py;python find_total_sentences_unique.py {};echo ""' 2 | -------------------------------------------------------------------------------- /src/backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/backend/__init__.py -------------------------------------------------------------------------------- /src/backend/data.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import json 3 | import os 4 | import re 5 | 6 | from pytube import YouTube 7 | from subprocess import check_output 8 | 9 | DIR = 'Videos' 10 | CATEGORIES = (1<<3) 11 | 12 | with open('train_2017/videodatainfo_2017.json') as f: 13 | vdi = json.loads(f.read()) 14 | _videos={} 15 | for v in vdi['videos']: 16 | if ((1<0: 17 | _videos[v['video_id']] = { 'url' : v['url'] } 18 | for s in vdi['sentences']: 19 | if s['video_id'] in _videos.keys(): 20 | _videos[s['video_id']]['caption'] = s['caption'] 21 | 22 | def download_all(): 23 | count = 0 24 | for _id in _videos.keys(): 25 | print("Dowloading %s " % _id) 26 | getVideoFname(_id) 27 | count+=1 28 | print("%3.2f %% Completed" % (100.0*count/len(_videos.keys()))) 29 | 30 | def sz_videos(): 31 | return len(_videos) 32 | 33 | def get_videoId(index): 34 | v = _videos.keys()[index] 35 | return v 36 | 37 | def getVideoFname(videoId): 38 | try: 39 | fname = DIR+"/"+videoId+".mp4" 40 | # Caching 41 | if os.path.isfile(fname): 42 | print("Used cached video file %s " % fname) 43 | return fname 44 | url = _videos[videoId]['url'] 45 | print("Fetching info from %s " % url) 46 | yt = YouTube(url) 47 | v = yt.filter('mp4')[0] 48 | # For Non mp4, NOT SUPPORTED for now 49 | # if v is None: 50 | # v = yt.videos()[0] 51 | dfname = DIR+"/"+v.filename+".mp4" 52 | if v: 53 | print("Video Downloading %s " % videoId) 54 | v.download(DIR) 55 | print("Moving %s to %s " % (dfname,fname)) 56 | os.rename(dfname,fname) 57 | print("Video Downloaded") 58 | return fname 59 | else: 60 | print("Video not Found for %s " % videoId) 61 | return None 62 | except Exception as e: 63 | print(str(e)) 64 | return None 65 | 66 | 67 | def getCaption(videoId): 68 | return _videos[videoId]['caption'] 69 | 70 | def getDuration(fname): 71 | return int(float(os.popen("ffprobe -i %s -show_format 2>&1 | grep duration | sed 's/duration=//'" % (fname,)).read())) 72 | 73 | def getFrame(fname,ts): 74 | iname = 'Videos/frames/frame.png' 75 | hr = ts//3600 76 | ts = ts%(3600) 77 | mi = ts//60 78 | ts = ts%60 79 | time = "%02d:%02d:%02d" % (hr,mi,ts) 80 | print("getting frame for time %s " % time) 81 | os.popen("ffmpeg -y -ss %s -i %s -frames 1 %s" % (time,fname,iname)) 82 | img = cv2.imread(iname) 83 | return img 84 | 85 | def getVideo(videoId): 86 | fname = getVideoFname(videoId) 87 | if fname is None: 88 | return None 89 | print("Loading Video %s " % fname) 90 | duration = getDuration(fname) 91 | print("Duration " + str(duration) +" sec") 92 | COUNT = 5 93 | if duration < 15*COUNT: 94 | print("Video too short") 95 | return None 96 | frames = [] 97 | for i in range(COUNT): 98 | image = getFrame(fname,15*(i+1)) 99 | frames.append(image) 100 | return frames 101 | 102 | 103 | -------------------------------------------------------------------------------- /src/backend/framework.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import csv 3 | import json 4 | import numpy as np 5 | import os 6 | import shutil 7 | import sys 8 | 9 | from keras import callbacks 10 | from pprint import pformat 11 | from random import shuffle 12 | 13 | from backend.model import VModel 14 | from backend.vpreprocess import Preprocessor 15 | from common.config import get_app_config, get_vpreprocess_config 16 | from common.logger import logger 17 | from common.status import ModelWeightsStatus 18 | 19 | 20 | WORKERS = 40 21 | DATASET_CACHE = get_app_config()["DATASET_CACHE"] 22 | COCOFNAME = get_vpreprocess_config()["COCOFNAME"] 23 | 24 | CLABEL = 'ResNet_D512L512_G128G64_D1024D0.20BN_BDLSTM1024_D0.2L1024DVS' 25 | 26 | state_uninit = {'epochs':5000, 'start_batch':0, 'batch_size':100, 'saveAtBatch':500, 'steps_per_epoch':500} 27 | 28 | MFNAME = DATASET_CACHE+'/'+CLABEL+'_model.dat' 29 | _MFNAME = DATASET_CACHE+'/'+CLABEL+'_model.dat.bak' 30 | STATE = DATASET_CACHE+'/'+CLABEL+'_state.txt' 31 | RESULTS = DATASET_CACHE+'/'+CLABEL+'_results.txt' 32 | FRESTART = DATASET_CACHE+'/restart' 33 | PREDICT_BATCHSIZE = 200 34 | 35 | class TrainingLogs: 36 | def __init__(self, prefix=""): 37 | self.epochLogHistory = [] 38 | self.fname = DATASET_CACHE+'/'+CLABEL + "_logs_" + prefix + ".txt" 39 | 40 | def flush(self): 41 | if not os.path.exists(self.fname): 42 | with open(self.fname, "w") as f: 43 | wr = csv.writer(f) 44 | if len(self.epochLogHistory) > 0: 45 | with open(self.fname, "a") as f: 46 | wr = csv.writer(f) 47 | for h in self.epochLogHistory: 48 | wr.writerow(h) 49 | self.epochLogHistory = [] 50 | logger.debug("Training Logs flushed") 51 | 52 | def add(self,cont): 53 | MXCol = 15 54 | dat = [-1] * 15 55 | for i in range(min(MXCol,len(cont))): 56 | dat[i]=cont[i] 57 | self.epochLogHistory.append(dat) 58 | 59 | class ModelGeneratorCallback(callbacks.Callback): 60 | 61 | def __init__(self, state, tlogs, elogs, framework): 62 | self.state = state 63 | self.lastloss = float('inf') 64 | self.tlogs = tlogs 65 | self.elogs = elogs 66 | self.last_epochmodel = None 67 | self.framework = framework 68 | self.batchTrainedCounter = 0 69 | self.bestlossepoch = float('inf') 70 | 71 | def on_epoch_end(self, epoch, logs={}): 72 | logger.debug("Epoch %d End " % epoch) 73 | self.state['epochs']-=1 74 | loss = logs['loss'] 75 | acc = logs['acc'] 76 | valloss = logs['val_loss'] 77 | valacc = logs['val_acc'] 78 | # Sample Content 79 | # {'CIDEr': 0.11325126353463148, 'Bleu_4': 0.1706107390467726, 'Bleu_3': 0.27462591349020055, 'Bleu_2': 0.4157995334621001, 'Bleu_1': 0.6064295446876932, 'ROUGE_L': 0.40471970665189977, 'METEOR': 0.17162570735633326} 80 | coco_json = self.framework.eval_onvalidation() 81 | cider = coco_json['CIDEr'] 82 | bleu4 = coco_json['Bleu_4'] 83 | rouge = coco_json['ROUGE_L'] 84 | meteor = coco_json['METEOR'] 85 | ename = "%.3f_Cider%.3f_Blue%.3f_Rouge%.3f_Meteor%.3f" % (valloss, cider, bleu4, rouge, meteor) 86 | self.elogs.add([epoch,loss, acc, valloss, valacc, cider, bleu4, rouge, meteor]) 87 | self.elogs.flush() 88 | if valloss < self.bestlossepoch or True: 89 | to_rm = self.last_epochmodel 90 | self.last_epochmodel = self.framework.save(epoch=("%03d_loss_%s" % (self.state['epochs'],ename))) 91 | self.bestlossepoch = valloss 92 | if to_rm is not None: 93 | pass 94 | # os.remove(to_rm) 95 | return 96 | 97 | def on_batch_end(self, batch, logs={}): 98 | logger.debug("Batch %d ends" % batch) 99 | valloss = -1 100 | valacc = -1 101 | loss = logs['loss'] 102 | acc = logs['acc'] 103 | self.lastloss = loss 104 | print("Keys Logger %s " % str(logs.keys())) 105 | self.tlogs.add([batch, loss, acc, valloss, valacc]) 106 | self.state['start_batch'] += 1 107 | self.batchTrainedCounter += 1 108 | logger.debug("Batches Trained : %d" % self.batchTrainedCounter) 109 | if self.batchTrainedCounter % self.state['saveAtBatch'] == 0: 110 | logger.debug("Preparing To Save") 111 | self.framework.save() 112 | self.tlogs.flush() 113 | 114 | 115 | class Framework(): 116 | 117 | def __init__(self, model_load = MFNAME, train_mode = False): 118 | self.mode_learning = train_mode 119 | self.state = state_uninit 120 | self.file_model = model_load 121 | self.status_model_weights = ModelWeightsStatus.NO_INFO 122 | self.tlogs = TrainingLogs() 123 | self.elogs = TrainingLogs(prefix = "epoch_") 124 | self.model = None # Init in self.build_model() 125 | self.preprocess = Preprocessor() 126 | self.build_model() 127 | self.load() 128 | logger.debug("__init__ framework complete") 129 | 130 | def build_model(self): 131 | vocab = self.preprocess.vocab 132 | self.vmodel = VModel(vocab.CAPTION_LEN, vocab.VOCAB_SIZE, learning = self.mode_learning) 133 | self.model = self.vmodel.get_model() 134 | assert self.preprocess is not None 135 | self.preprocess.set_vmodel(self.vmodel) 136 | 137 | def load(self): 138 | logger.debug("Model Path: %s" % self.file_model) 139 | if os.path.exists(self.file_model): 140 | self.model.load_weights(self.file_model) 141 | self.status_model_weights = ModelWeightsStatus.SUCCESS 142 | logger.debug("Weights Loaded") 143 | else: 144 | self.status_model_weights = ModelWeightsStatus.MODEL_NOT_FOUND 145 | logger.warning("Weights files not found.") 146 | if os.path.exists(STATE): 147 | with open(STATE) as f: 148 | self.state = json.load(f) 149 | logger.debug("State Loaded") 150 | 151 | def get_weights_status(self): 152 | return str(self.status_model_weights) 153 | 154 | def save(self, epoch='xx'): 155 | try: 156 | pass 157 | finally: 158 | tname = _MFNAME 159 | self.model.save_weights(tname) 160 | fname = self.file_model 161 | if epoch != 'xx': 162 | fname = self.file_model + '_' + epoch 163 | shutil.copy2(tname,fname) 164 | os.remove(tname) 165 | logger.debug("Weights Saved") 166 | with open(STATE,'w') as f: 167 | json.dump(self.state,f) 168 | logger.debug("State Saved") 169 | return fname 170 | return None 171 | 172 | def train_generator(self): 173 | epochs = self.state['epochs'] 174 | bs = self.state['batch_size'] 175 | steps_per_epoch = self.state['steps_per_epoch'] 176 | validation_steps = 1 177 | logger.debug("Epochs Left : %d " % epochs) 178 | logger.debug("Batch Size : %d " % bs) 179 | 180 | train_dg = self.preprocess.data_generator(bs, start=self.state['start_batch'], typeSet = 0) 181 | val_dg = self.preprocess.data_generator(bs, -1, typeSet = 1) 182 | logger.debug("Attemping to fit") 183 | callbacklist = [ModelGeneratorCallback(self.state, self.tlogs, self.elogs, self)] 184 | self.vmodel.train_mode() 185 | self.model.fit_generator(train_dg, steps_per_epoch=steps_per_epoch, epochs=epochs, 186 | verbose=1,validation_data=val_dg, validation_steps=validation_steps, 187 | initial_epoch=0, callbacks=callbacklist, 188 | workers=WORKERS, use_multiprocessing=True) 189 | 190 | def predict_model_direct(self, fnames, cache_ids = None): 191 | videoVecs = [] 192 | audioVecs = [] 193 | for i in range(len(fnames)): 194 | cid = None 195 | if cache_ids is not None: 196 | cid = cache_ids[i] 197 | vid_audio = self.preprocess.get_video_content(fnames[i], cache_id = cid) 198 | if vid_audio is None: 199 | return None,{'error':'Video %d couldn\'t be loaded. %s ' % (i, fnames[i])} 200 | videoVecs.append(vid_audio[0]) # Video Features 201 | audioVecs.append(vid_audio[1]) # Audio Features 202 | videoVecs = np.array(videoVecs) 203 | audioVecs = np.array(audioVecs) 204 | 205 | # videoVecs =np.array([self.preprocess.get_video_content(f) for f in fnames]) 206 | count = len(fnames) 207 | logger.debug("Predicting for Videos :- \n\t%s " % fnames) 208 | l = 0 209 | vocab = self.preprocess.vocab 210 | startCapRow = [vocab.wordEmbedding[vocab.specialWords['START']] ] 211 | startCapRow.extend([ vocab.wordEmbedding[vocab.specialWords['NONE']] ] * vocab.CAPTION_LEN) 212 | 213 | embeddedCap = np.array([ startCapRow ] * count) 214 | logger.debug("Shape of Caption : %s", str(np.shape(embeddedCap))) 215 | stringCaption = [] 216 | for i in range(count): 217 | stringCaption.append([]) 218 | while l < vocab.CAPTION_LEN: 219 | newOneHotCap = self.model.predict([embeddedCap, audioVecs, videoVecs]) 220 | print("Shape of out Predict Model : %s " % str(np.shape(newOneHotCap))) 221 | for i,newOneHotWord in enumerate(newOneHotCap): 222 | nword = vocab.word_fromonehot(newOneHotWord[l]) 223 | # print(str(i)+" "+str(l)+" "+nword) 224 | stringCaption[i].append( nword ) 225 | if l + 1 != vocab.CAPTION_LEN: 226 | embeddedCap[i][l+1] = vocab.wordEmbedding[nword] 227 | 228 | print([' '.join(cap) for cap in stringCaption]) 229 | l += 1 230 | logger.debug("Prediction Complete") 231 | captionObject = [] 232 | for i,cap in enumerate(stringCaption): 233 | captionObject.append({'fname':fnames[i], 'caption':cap}) 234 | return stringCaption, captionObject 235 | 236 | def predict_ids(self, _ids): 237 | logger.debug("Trying to predict for %s" % (_ids,)) 238 | result = self.predict_model(_ids = _ids) 239 | return result 240 | 241 | def predict_fnames(self, fnames): 242 | logger.debug("Trying to predict for %s" % (fnames,)) 243 | result = self.predict_model(fnames = fnames) 244 | return result 245 | 246 | def predict_model(self, _ids = None, fnames = None): 247 | assert (_ids is None) ^ (fnames is None) 248 | vHandler = self.preprocess.vHandler 249 | if fnames is None: 250 | fnames = [] 251 | for _id in _ids: 252 | logger.debug("Obtaining fname for %d" % _id) 253 | fname = vHandler.downloadVideo(_id) 254 | if fname is None: 255 | logger.info("Ignoring %d video " % _id) 256 | else: 257 | fnames.append(fname) 258 | 259 | batch_size = PREDICT_BATCHSIZE 260 | batch_count = (len(fnames)+batch_size-1)//batch_size 261 | predictions,output = ([],[]) 262 | for i in range(batch_count): 263 | cids = None 264 | if _ids is not None: 265 | cids = _ids[i*batch_size:(i+1)*batch_size] 266 | pred,out = self.predict_model_direct(fnames[i*batch_size:(i+1)*batch_size], cache_ids = cids) 267 | if pred is None: 268 | logger.debug(json.dumps(out)) 269 | assert False 270 | predictions.extend(pred) 271 | output.extend(out) 272 | results = [] 273 | for i in range(len(fnames)): 274 | logger.debug("For eog %s" % fnames[i]) 275 | predictedCaption = ' '.join(predictions[i]) 276 | logger.debug("Predicted Caption : %s" % predictedCaption ) 277 | actualCaption = None 278 | if _ids is not None: 279 | actualCaption = vHandler.getCaptionData()[_ids[i]] 280 | logger.debug("Actual Captions - \n%s" % pformat(actualCaption) ) 281 | res = dict() 282 | res['fname'] = fnames[i] 283 | res['output'] = predictedCaption 284 | res['actual'] = actualCaption 285 | results.append(res) 286 | return json.dumps(results, indent=4, sort_keys=True) 287 | 288 | def isVideoExtension(self, fname): 289 | for ext in ['mp4','jpeg','png']: 290 | if fname.endswith('.'+ext): 291 | return True 292 | return False 293 | 294 | def predict_test(self, dirpath, mxc): 295 | videos = ["%s/%s" % (dirpath,vid) for vid in os.listdir(dirpath) if self.isVideoExtension(vid)][0:mxc] 296 | self.predict_model(fnames = videos) 297 | 298 | def clean_caption(self, msg): 299 | if '<' in msg: 300 | return msg.split("<")[0] 301 | return msg 302 | 303 | def save_all(self, _ids, save = RESULTS): 304 | _result = json.loads(self.predict_ids(_ids)) 305 | test_predicted = [] 306 | test_actual = [] 307 | for res in _result: 308 | tp = dict() 309 | _id = int(res['fname'].split('/')[-1].split('.')[0]) 310 | tp['video_id'] = _id 311 | tp['caption'] = self.clean_caption(res['output']) 312 | test_predicted.append(tp) 313 | 314 | for cap in res['actual']: 315 | tp_actual = dict() 316 | tp_actual['video_id'] = _id 317 | tp_actual['caption'] = cap 318 | test_actual.append(tp_actual) 319 | result = dict() 320 | result['predicted'] = test_predicted 321 | result['actual'] = test_actual 322 | with open(save, 'w') as f: 323 | f.write(json.dumps(result)) 324 | logger.debug("Result Saved") 325 | 326 | def eval_onvalidation(self): 327 | fname = '/tmp/save_model_' + CLABEL 328 | logger.debug("Calculating cocoscore") 329 | valids = self.preprocess.vHandler.getValidationIds() 330 | self.save_all(valids, save = fname) 331 | cmd = "python %s %s | tail -n 1" % (COCOFNAME, fname) 332 | coco = ast.literal_eval(os.popen(cmd).read().strip()) 333 | logger.debug("Done") 334 | logger.debug("Coco Scores :%s\n" % json.dumps(coco,indent=4, sort_keys=True)) 335 | return coco 336 | 337 | def get_testids(self, count = -1): 338 | ids = self.preprocess.vHandler.getTestIds() 339 | if count == -1: 340 | count = len(ids) 341 | else: 342 | shuffle(ids) 343 | return ids[:count] 344 | 345 | def get_valids(self, count = -1): 346 | ids = self.preprocess.vHandler.getValidationIds() 347 | if count == -1: 348 | count = len(ids) 349 | else: 350 | shuffle(ids) 351 | return ids[:count] 352 | 353 | def get_trainids(self, count = -1): 354 | ids = self.preprocess.vHandler.getTrainingIds() 355 | if count == -1: 356 | count = len(ids) 357 | else: 358 | shuffle(ids) 359 | return ids[:count] 360 | -------------------------------------------------------------------------------- /src/backend/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import sys 4 | 5 | from keras.applications import ResNet50, VGG16 6 | from keras.applications.inception_v3 import InceptionV3 7 | from keras.applications.resnet50 import preprocess_input 8 | from keras.layers import Dropout, Merge, Flatten, RepeatVector, Activation 9 | from keras.layers import Embedding, Conv2D, MaxPooling2D, LSTM, GRU, BatchNormalization 10 | from keras.layers import TimeDistributed, Dense, Input, Flatten, GlobalAveragePooling2D, Bidirectional 11 | from keras.models import Model 12 | from keras.models import Sequential 13 | from keras.optimizers import RMSprop 14 | from keras.preprocessing import image 15 | from keras.regularizers import l2 16 | import keras.backend as K 17 | import tensorflow as tf 18 | 19 | from backend.vocab import Vocab 20 | from common.logger import logger 21 | 22 | def sentence_distance(y_true, y_pred): 23 | return K.sqrt(K.sum(K.square(K.abs(y_true-y_pred)),axis=1,keepdims=True)) 24 | 25 | class VModel: 26 | 27 | def __init__(self, CAPTION_LEN, VOCAB_SIZE, cutoffonly = False, learning = True): 28 | self.CAPTION_LEN = CAPTION_LEN 29 | self.VOCAB_SIZE = VOCAB_SIZE 30 | if not cutoffonly: 31 | self.build_mcnn(self.CAPTION_LEN, self.VOCAB_SIZE, learning = learning) 32 | self.build_cutoffmodel() 33 | 34 | def get_model(self): 35 | return self.model 36 | 37 | ''' 38 | Attempt to split pretrained CNN out of model 39 | To cache a lower dimension vector per frame to file 40 | # PC : pretrained CNN will be non-trainable now 41 | ''' 42 | def build_cutoffmodel(self): 43 | base = ResNet50(include_top = False, weights='imagenet') 44 | # base = InceptionV3(include_top = False, weights='imagenet') 45 | self.co_model = base 46 | logger.debug("Building Cutoff Model") 47 | self.co_model.summary() 48 | self.co_model._make_predict_function() 49 | self.graph = tf.get_default_graph() 50 | logger.debug("Building Cutoff Model : Completed") 51 | return self.co_model 52 | 53 | # co == Cutoff Model 54 | def co_getoutshape(self, assert_model = None): 55 | # ResNet 56 | shape = (None,2048) 57 | ## Inception V3 58 | # shape = (None, 8*8*2048) 59 | logger.debug("Model Cutoff OutShape : %s" % str(shape)) 60 | ''' 61 | # Not in use 62 | if assert_model is not None: 63 | ashape = assert_model.output_shape 64 | sz = 1 65 | for x in ashape: 66 | if x is not None: 67 | sz = sz * x 68 | ashape = (None, sz) 69 | logger.debug("Assert Model Cutoff OutShape : %s" % str(ashape)) 70 | assert shape == ashape 71 | ''' 72 | assert len(shape) == 2 73 | assert shape[0] is None 74 | return shape 75 | 76 | def preprocess_partialmodel(self, frames): 77 | frames_in = np.asarray([image.img_to_array(frame) for frame in frames]) 78 | frames_in = preprocess_input(frames_in) 79 | with self.graph.as_default(): 80 | frames_out = self.co_model.predict(frames_in) 81 | frames_out = np.array([frame.flatten() for frame in frames_out]) 82 | return frames_out 83 | 84 | def train_mode(self): 85 | K.set_learning_phase(1) 86 | 87 | def build_mcnn(self, CAPTION_LEN, VOCAB_SIZE, learning = True): 88 | if learning: 89 | self.train_mode() 90 | from backend.videohandler import VideoHandler 91 | logger.debug("Creating Model (CNN Cutoff) with Vocab Size : %d " % VOCAB_SIZE) 92 | cmodel = Sequential() 93 | cmodel.add(TimeDistributed(Dense(512,kernel_initializer='random_normal'), input_shape=(CAPTION_LEN+1,Vocab.OUTDIM_EMB ))) 94 | cmodel.add(LSTM(512, return_sequences=True,kernel_initializer='random_normal')) 95 | cmodel.summary() 96 | 97 | input_shape_audio = VideoHandler.AUDIO_FEATURE 98 | amodel = Sequential() 99 | amodel.add(GRU(128, 100 | dropout=0.2, 101 | recurrent_dropout=0.2, 102 | return_sequences=True, 103 | input_shape=input_shape_audio)) 104 | amodel.add(BatchNormalization()) 105 | amodel.add(GRU(64, 106 | dropout=0.2, 107 | recurrent_dropout=0.2, 108 | return_sequences=True)) 109 | amodel.add(BatchNormalization()) 110 | amodel.add(Flatten()) 111 | amodel.add(RepeatVector(CAPTION_LEN + 1)) 112 | amodel.summary() 113 | 114 | input_shape_vid = self.co_getoutshape() 115 | imodel = Sequential() 116 | imodel.add(TimeDistributed(Dense(1024,kernel_initializer='random_normal'), input_shape=input_shape_vid)) 117 | imodel.add(TimeDistributed(Dropout(0.20))) 118 | imodel.add(TimeDistributed(BatchNormalization(axis=-1))) 119 | imodel.add(Activation('tanh')) 120 | imodel.add(Bidirectional(GRU(1024, return_sequences=False, kernel_initializer='random_normal'))) 121 | imodel.add(RepeatVector(CAPTION_LEN + 1)) 122 | 123 | imodel.summary() 124 | 125 | model = Sequential() 126 | model.add(Merge([cmodel,amodel,imodel],mode='concat')) 127 | model.add(TimeDistributed(Dropout(0.2))) 128 | model.add(LSTM(1024,return_sequences=True, kernel_initializer='random_normal',recurrent_regularizer=l2(0.01))) 129 | model.add(TimeDistributed(Dense(VOCAB_SIZE,kernel_initializer='random_normal'))) 130 | model.add(Activation('softmax')) 131 | optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, decay=0) 132 | model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) 133 | model.summary() 134 | logger.debug("Model Created ResNet_D512L512_G128G64_D1024D0.25BN_BDGRU1024_D0.2L1024DVS") 135 | self.model = model 136 | return model 137 | 138 | def plot_model(self, filename): 139 | from keras.utils import plot_model 140 | plot_model(self.model, to_file=filename, show_shapes = True, show_layer_names = False) 141 | print("Model Plotted in %s"%filename) 142 | 143 | if __name__ == "__main__": 144 | if sys.argv[1] == "plot_model": 145 | from vocab import Vocab 146 | vmodel = VModel(Vocab.CAPTION_LEN, Vocab.VOCAB_SIZE) 147 | vmodel.plot_model(sys.argv[2]) 148 | -------------------------------------------------------------------------------- /src/backend/parser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | 4 | from common.logger import logger 5 | from common.rpc import register_server, get_rpc, PORT 6 | 7 | class Parser: 8 | def __init__(self): 9 | pass 10 | 11 | def init_framework(self, model_fname = None, train_mode = False): 12 | if not hasattr(self,'framework'): 13 | from backend.framework import Framework 14 | if model_fname is not None: 15 | self.framework = Framework(model_load = model_fname, train_mode = train_mode) 16 | else: 17 | self.framework = Framework(train_mode = train_mode) 18 | 19 | def parse(self): 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('command', choices=['train','predict','server','predict_all_model']) 22 | args = parser.parse_args(sys.argv[1:2]) 23 | if args.command == 'train': 24 | self.train() 25 | if args.command == 'predict': 26 | self.predict() 27 | if args.command == 'server': 28 | self.server() 29 | if args.command == 'predict_all_model': 30 | self.predict_all_model() 31 | print(args.command) 32 | 33 | def train(self): 34 | logger.debug("Training Mode") 35 | self.init_framework(train_mode = True) 36 | self.framework.train_generator() 37 | 38 | def predict_all_model(self): 39 | import glob, os 40 | from backend.framework import Framework, MFNAME 41 | 42 | logger.debug("PredictAllModel Mode") 43 | result_dir = 'CombinedResults' 44 | os.system('mkdir -p %s' % result_dir) 45 | for fname in glob.glob(MFNAME+"_*"): 46 | save_file = result_dir + "/result_"+os.path.basename(fname)+"_.txt" 47 | if os.path.exists(save_file): 48 | continue 49 | logger.debug("Working on model %s " % fname) 50 | self.framework = Framework(model_load = fname) 51 | self.framework.save_all(_ids = self.framework.get_testids(), save = save_file) 52 | logger.debug("Done") 53 | 54 | def predict(self): 55 | parser = argparse.ArgumentParser(prog = sys.argv[0]+" predict", description = 'Prediction Mode') 56 | parser.add_argument('dataset', choices=['train','test','save_all_test'], help='Video dataset for prediction') 57 | parser.add_argument('-c', '--count', type = int, default = 10) 58 | args = parser.parse_args(sys.argv[2:]) 59 | 60 | logger.debug("Prediction Mode") 61 | self.init_framework() 62 | if args.dataset == 'train': 63 | _ids = self.framework.get_trainids(args.count) 64 | elif args.dataset == 'test': 65 | _ids = self.framework.get_testids(args.count) 66 | elif args.dataset == 'save_all_test': 67 | self.framework.save_all(_ids = self.framework.get_testids()) 68 | return 69 | else: 70 | assert False 71 | self.framework.predict_model(_ids = _ids) 72 | 73 | def server(self): 74 | logger.debug("Server Mode") 75 | parser = argparse.ArgumentParser(prog = sys.argv[0]+" server", description = 'Server Mode') 76 | parser.add_argument('-i', '--init-only', help='Prepares early caches for faster execution', action='store_true') 77 | parser.add_argument('-s', '--start', help='Start RPC Server', action='store_true') 78 | parser.add_argument('-m', '--model', help='Model file') 79 | parser.add_argument('-pids', '--predict_ids',type=int, help='Obtain Results for given IDs', nargs='+') 80 | parser.add_argument('-pfs', '--predict_fnames', help='Obtain Results for given files', nargs='+') 81 | parser.add_argument('-cf', '--close_framework', help='Close Server Framework', action='store_true') 82 | args = parser.parse_args(sys.argv[2:]) 83 | if args.init_only: 84 | self.init_framework() 85 | print("[RPC][Server][Init][Done]") 86 | elif args.start: 87 | model_fname = None 88 | if args.model: 89 | model_fname = args.model 90 | self.init_framework(model_fname) 91 | register_server(self.framework) 92 | elif args.predict_ids: 93 | proxy = get_rpc() 94 | result = proxy.predict_ids( args.predict_ids ) 95 | print(result) 96 | elif args.predict_fnames: 97 | proxy = get_rpc() 98 | result = proxy.predict_fnames( args.predict_fnames ) 99 | print(result) 100 | elif args.close_framework: 101 | proxy = get_rpc() 102 | proxy.close_framework() 103 | print("[RPC][Send][close_framework]") 104 | else: 105 | parser.print_help() 106 | 107 | if __name__ == "__main__": 108 | Parser().parse() 109 | -------------------------------------------------------------------------------- /src/backend/plotepochlog.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import sys 4 | 5 | MXPOINT = 100 6 | assert len(sys.argv)>=2 7 | fname=sys.argv[1] 8 | showtrain = True 9 | if len(sys.argv)>=3: 10 | showtrain = (sys.argv[2] == 'yo') 11 | showval = True 12 | if len(sys.argv)>=4: 13 | showval = (sys.argv[3] == 'yo') 14 | showepoch = True 15 | if len(sys.argv)>=5: 16 | showepoch = (sys.argv[4] == 'yo') 17 | 18 | print("Fname %s " % fname) 19 | 20 | batch = [] 21 | loss = [] 22 | acc = [] 23 | val_loss = [] 24 | val_acc = [] 25 | 26 | ndata = [] 27 | with open(fname,'r') as f: 28 | for row in f: 29 | rr =[float(x) for x in row.split(',')] 30 | ndata.append(rr) 31 | 32 | ndata = np.array(ndata, dtype='float') 33 | print(np.shape(ndata)) 34 | step = 1 35 | if len(ndata[0]) > MXPOINT: 36 | step = len(ndata[0]) // MXPOINT 37 | [batch, loss, acc, val_loss, val_acc,cider,bleu4,rouge,meteor] = [y[::step] for y in np.matrix.transpose(ndata)][:9] 38 | 39 | x = range(len(batch)) 40 | fig = plt.figure() 41 | host = fig.add_subplot(111) 42 | pscores = host.twinx() 43 | pacc = host.twinx() 44 | ploss = host.twinx() 45 | 46 | if showepoch: 47 | _b,=host.plot(x,batch,color= plt.cm.viridis(0.95),label='Batches') 48 | 49 | if showtrain: 50 | _a,=pacc.plot(x,acc,'-.',label="Accuracy",color= plt.cm.viridis(0)) 51 | _l,=ploss.plot(x,loss, '-', label="Loss", color = plt.cm.viridis(0)) 52 | if showval: 53 | ploss.plot(x,val_loss,'-', label="Val Loss",color= plt.cm.viridis(0.5)) 54 | pacc.plot(x,val_acc,'-.',label="Val Accuracy",color= plt.cm.viridis(0.5)) 55 | if showtrain or showval: 56 | ploss.legend(loc='lower right') 57 | pacc.legend(loc='lower left') 58 | ploss.spines['right'].set_position(('outward', 30)) 59 | 60 | score_total = cider+bleu4+rouge+meteor 61 | pscores.plot(x,cider,'-', label="Cider",color= plt.cm.viridis(0.0)) 62 | pscores.plot(x,bleu4,'-', label="Bleu4",color= plt.cm.viridis(0.2)) 63 | pscores.plot(x,rouge,'-', label="Rouge",color= plt.cm.viridis(0.4)) 64 | pscores.plot(x,meteor,'-', label="Meteor",color= plt.cm.viridis(0.6)) 65 | pscores.plot(x,score_total,'-', label="Total",color= plt.cm.viridis(0.8)) 66 | pscores.legend(loc='upper left') 67 | 68 | 69 | #host.yaxis.label.set_color(_b.get_color()) 70 | #ploss.yaxis.label.set_color(_l.get_color()) 71 | #pacc.yaxis.label.set_color(_a.get_color()) 72 | 73 | #plt.savefig("plot.png", bbox_inches='tight') 74 | 75 | best_iter = np.argmax(score_total) 76 | print("Best Iteration %d " % best_iter) 77 | print("\tCIDER %.4f " % cider[best_iter]) 78 | print("\tBLEU4 %.4f " % bleu4[best_iter]) 79 | print("\tROUGE %.4f " % rouge[best_iter]) 80 | print("\tMETEOR %.4f " % meteor[best_iter]) 81 | print("\tTotalScore %.4f " % score_total[best_iter]) 82 | 83 | 84 | 85 | plt.show() 86 | -------------------------------------------------------------------------------- /src/backend/pred.sh: -------------------------------------------------------------------------------- 1 | python framework.py -predict 882,527155,480723,267111,393362 2 | -------------------------------------------------------------------------------- /src/backend/preprocess.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import json 3 | import os 4 | import numpy as np 5 | import pickle 6 | import random 7 | import re 8 | import shutil 9 | import sys 10 | 11 | from sets import Set 12 | 13 | from keras import callbacks 14 | from keras.applications import imagenet_utils 15 | from keras.preprocessing import image 16 | from keras.preprocessing import sequence 17 | 18 | from common.logger import logger 19 | 20 | ROOT_DIR = '/home/gagan.cs14/btp' 21 | GITBRANCH = os.popen('git branch | grep "*"').read().split(" ")[1][:-1] 22 | GITBRANCHPREFIX = "/home/gagan.cs14/btp_"+GITBRANCH+"/" 23 | # Parameters 24 | CAPTION_LEN = 10 25 | MAX_WORDS = 400000 26 | OUTENCODINGGLOVE = False 27 | 28 | os.chdir(ROOT_DIR) 29 | BADLOGS = GITBRANCHPREFIX+"badlogs.txt" 30 | FILENAME_CAPTION = 'ImageDataset/annotations/captions_train2014.json' 31 | DIR_IMAGES = 'ImageDataset/train2014/' 32 | DIR_IMAGESP = 'ImageDataset/processed/' 33 | VOCAB_FILE = GITBRANCHPREFIX+"vocab.dat" 34 | GLOVE_FILE = 'glove/glove.6B.100d.txt' 35 | OUTDIM_EMB = 100 36 | USE_GLOVE = True 37 | WORD_MIN_FREQ = 5 38 | def get_image_fname(_id): 39 | return '%sCOCO_train2014_%012d.jpg' % (DIR_IMAGES, _id) 40 | 41 | 42 | vocab = Set([]) 43 | v_ind2word = {} 44 | v_word2ind = {} 45 | VOCAB_SIZE = [0] 46 | 47 | embeddingLen = None 48 | 49 | #embeddingMatrix = np.zeros((MAX_WORDS, 100)) 50 | #EMBEDDING_FILE = 'embedding' 51 | #embeddingMatrixRef = [ embeddingMatrix ] 52 | #################################################ADD GIT BRANCH 53 | ICAPPF = GITBRANCHPREFIX+'imcap.dat' 54 | embeddingIndex = {} 55 | EMBEDDINGI_FILE = GITBRANCHPREFIX+'embeddingIScaled5' 56 | EMBEDDING_OUT_SCALEFACT = 5 #(-4.0665998, 3.575) needs to be mapped to -1 to +1 57 | embeddingIndexRef = [ embeddingIndex ] 58 | 59 | 60 | def createDirs(): 61 | try: 62 | os.makedirs(GITBRANCHPREFIX) 63 | os.makedirs(ROOT_DIR + '/' + DIR_IMAGESP) 64 | except OSError: 65 | if not os.path.isdir(GITBRANCHPREFIX): 66 | raise 67 | 68 | def badLogs(msg): 69 | print(msg) 70 | with open(BADLOGS,"a") as f: 71 | f.write(msg) 72 | 73 | ''' 74 | def addToVocab(w): 75 | global VOCAB_SIZE 76 | vocab.add(w) 77 | v_ind2word[ VOCAB_SIZE ] = w 78 | v_word2ind[ w ] = VOCAB_SIZE 79 | if not isEmbeddingPresent: 80 | if w in embeddingIndex.keys(): 81 | embeddingMatrix[VOCAB_SIZE] = embeddingIndex[w] 82 | print(embeddingMatrix[VOCAB_SIZE]) 83 | if VOCAB_SIZE<10: 84 | print("%d : %s" % (VOCAB_SIZE, w)) 85 | VOCAB_SIZE += 1 86 | return VOCAB_SIZE-1 87 | ''' 88 | ''' 89 | Add NULL Word 90 | Add NonVocab Word 91 | ''' 92 | ENG_SOS = ">" 93 | ENG_EOS = "<" 94 | ENG_EXTRA = "___" 95 | ENG_NONE = "?!?" 96 | 97 | 98 | ''' 99 | def iniVocab(): 100 | global W_SOS,W_EOS 101 | #addToVocab("none") 102 | #addToVocab("extra") 103 | #W_SOS = addToVocab(ENG_SOS) 104 | #W_EOS = addToVocab(ENG_EOS) 105 | ''' 106 | 107 | 108 | def build_gloveVocab(): 109 | logger.debug("Started") 110 | if len(embeddingIndexRef[0].keys()) > 0: 111 | logger.debug("Embedding Already Present %d " % len(embeddingIndexRef[0].keys())) 112 | return 113 | isEmbeddingPresent = os.path.exists(EMBEDDINGI_FILE) 114 | print("Embedding Present %s " % isEmbeddingPresent) 115 | if isEmbeddingPresent: 116 | 117 | '''with open(EMBEDDING_FILE,'r') as f: 118 | global embeddingMatrix 119 | embeddingMatrix = pickle.load(f) 120 | embeddingMatrixRef[0] = embeddingMatrix 121 | ''' 122 | minVal = float('inf') 123 | maxVal = -minVal 124 | with open(EMBEDDINGI_FILE,'r') as f: 125 | global embeddingIndex 126 | embeddingIndex = pickle.load(f) 127 | embeddingIndexRef[0] = embeddingIndex 128 | for v in embeddingIndex.values(): 129 | for x in v: 130 | minVal = min(minVal,x) 131 | maxVal = max(maxVal,x) 132 | #print("minVal, maxVal %s " % str((minVal,maxVal))) 133 | #exit() 134 | print("Embedding Loaded") 135 | else: 136 | with open(GLOVE_FILE,'r') as f: 137 | for i,line in enumerate(f): 138 | tokens = line.split() 139 | #print(tokens) 140 | tokens = [tok.__str__() for tok in tokens] 141 | #print(tokens) 142 | #exit() 143 | #if i==200: 144 | # break 145 | 146 | word = tokens[0] 147 | #embeddingLen = len(tokens)-1 148 | if word == "none": 149 | print("YoFound you") 150 | if i<5: 151 | print(word) 152 | #print(tokens[1:]) 153 | embeddingIndex[word] = np.asarray(tokens[1:], dtype='float32') * (1.0/EMBEDDING_OUT_SCALEFACT) 154 | #print(embeddingIndex[word]) 155 | #exit() 156 | #exit() 157 | assert isEmbeddingPresent == False 158 | isEmbeddingPresent = True 159 | #with open(EMBEDDING_FILE,'w') as f: 160 | # pickle.dump(embeddingMatrix,f) 161 | with open(EMBEDDINGI_FILE,'w') as f: 162 | pickle.dump(embeddingIndex,f) 163 | print("Embedding Saved!") 164 | 165 | #iniVocab() 166 | logger.debug("Completed") 167 | 168 | 169 | ''' 170 | def op_on_caption(cap): 171 | for w in cap.split(' '): 172 | w = w.lower() 173 | if w not in vocab: 174 | v_word2ind[ w ] = addToVocab(w) 175 | ''' 176 | def build_image_caption_pair(): 177 | if os.path.exists(ICAPPF): 178 | with open(ICAPPF,'r') as f: 179 | x,mywords = pickle.load(f) 180 | print("Image Caption Pair Data Model Loaded") 181 | return x,mywords 182 | 183 | x = {} 184 | logger.debug("Started") 185 | wordFreq = {} 186 | uwords = set([]) 187 | with open(FILENAME_CAPTION) as f: 188 | captions = json.load(f)['annotations'] 189 | count = 0 190 | for cap in captions: 191 | cap['caption']= re.sub('[^a-zA-Z]+', ' ', cap['caption'].encode('utf-8')).lower() 192 | for w in cap['caption'].split(' '): 193 | if w in uwords: 194 | wordFreq[w]+=1 195 | else: 196 | wordFreq[w] =1 197 | uwords.add(w) 198 | #op_on_caption(cap['caption']) 199 | if True or count < 100: 200 | x[cap['image_id']] = cap['caption'] 201 | count+=1 202 | #nmywords = wordFreq.keys() 203 | #sorted(nmywords, key=lambda key: wordFreq[key], reverse=True) 204 | #print(wordFreq.keys()) 205 | mywords = set([w for w in wordFreq.keys() if wordFreq[w]>=WORD_MIN_FREQ]) 206 | mywords.add(ENG_SOS) 207 | mywords.add(ENG_EOS) 208 | mywords.add(ENG_NONE) 209 | mywords.add(ENG_EXTRA) 210 | 211 | #print(mywords) 212 | print(len(mywords)) 213 | #mywords = mywords[:WORD_TOP] 214 | with open(ICAPPF,'w') as f: 215 | pickle.dump([x,mywords],f) 216 | print("Image Caption Pair Data Model Saved") 217 | 218 | 219 | logger.debug("Completed, Vocab Size NONE ")#%len(v_word2ind)) 220 | return (x,mywords) 221 | 222 | #def rgb2gray(rgb): 223 | # return np.dot(rgb[...,:3], [0.299, 0.587, 0.114]) 224 | 225 | def imageToVec(_id): 226 | NEED_W = 224 227 | NEED_H = 224 228 | if type("")==type(_id): 229 | fname = _id 230 | else: 231 | fname = get_image_fname(_id) 232 | #afname = DIR_IMAGESP + fname.split('/')[-1] + '.pickle' 233 | #if os.path.exists(afname): 234 | # with open(afname,'r') as f: 235 | # return pickle.load(f) 236 | #print(fname) 237 | img = image.load_img(fname, target_size=(NEED_H, NEED_W)) 238 | x = image.img_to_array(img) 239 | x /= 255. 240 | x -= 0.5 241 | x *= 2. 242 | x = np.asarray(x) 243 | #with open(afname,'w') as f: 244 | # pickle.dump(x,f) 245 | return x 246 | 247 | ############################################ REMOVE HERE ### 248 | #img.save("temp.jpg") 249 | #img = cv2.imread(fname) 250 | #print(img) 251 | #img = cv2.resize(img, (NEED_H, NEED_W)) 252 | #cv2.imwrite('test.jpg',img) 253 | #img = np.asarray(img) 254 | #print("Shape %s " % (str(np.shape(img)))) 255 | #cv2.imwrite('temp.jpg',img) 256 | #vec = np.asarray(img) 257 | #if not vec.any(): 258 | # badLogs("All zero for %s\n" % str(_id)) 259 | #vec = vec/255.0 260 | #return vec 261 | #bw = rgb2gray(img) 262 | #print("BW Shape %s " % (str(np.shape(bw)))) 263 | 264 | 265 | def getWord2Ind(w): 266 | w=w.lower() 267 | if w not in v_word2ind.keys(): 268 | w=ENG_EXTRA 269 | #print(w) 270 | return v_word2ind[w] 271 | 272 | def word2embd(word): 273 | if word not in embeddingIndexRef[0].keys(): 274 | word = ENG_EXTRA 275 | return embeddingIndexRef[0][word] 276 | 277 | def embdToWord(embd): 278 | bestWord = None 279 | distance = float('inf') 280 | for word in embeddingIndex.keys(): 281 | e=embeddingIndex[word] 282 | d = 0 283 | for a,b in zip(e,embd): 284 | d+=(a-b)*(a-b) 285 | if d0 355 | assert len(v_ind2word) == 0 356 | assert len(v_word2ind) == 0 357 | counter = 0 358 | for w in embeddingIndex.keys(): 359 | if w in topwords: 360 | v_ind2word[counter]=w 361 | v_word2ind[w]=counter 362 | counter += 1 363 | # ENG_* words present in embeddingIndex and topwords 364 | VOCAB_SIZE[0] = counter 365 | #print("Embedding Index Len %d " % len(embeddingIndex.keys())) 366 | #exit() 367 | print("TOPWords %d " % len(topwords)) 368 | print("Embeddding Words %d " % len(embeddingIndex.keys())) 369 | print("Cal Vocab Size %d " % VOCAB_SIZE[0]) 370 | 371 | with open(VOCAB_FILE,'w') as f: 372 | pickle.dump([v_ind2word,v_word2ind, VOCAB_SIZE[0]],f) 373 | print("Vocab Model Saved") 374 | 375 | assert ENG_SOS in v_word2ind.keys() 376 | assert ENG_EOS in v_word2ind.keys() 377 | assert ENG_NONE in v_word2ind.keys() 378 | assert ENG_EXTRA in v_word2ind.keys() 379 | print("Vocabulary Size %d for %d captions" % (VOCAB_SIZE[0], len(lst))) 380 | return lst 381 | 382 | def feed_image_caption(_id,lst): 383 | img,capGl,capOH = get_image_caption(_id,lst) 384 | # Glove 385 | we_sos = [word2embd(ENG_SOS)] 386 | we_eos = [word2embd(ENG_EOS)] 387 | # One Hot 388 | we_eosOH = [wordToEncode(ENG_EOS,encodeType="onehot")] 389 | return ( (we_sos+list(capGl)), img, (list(capOH) + we_eosOH)) 390 | 391 | def datas_from_ids(idlst,lst): 392 | images = [] 393 | capS = [] 394 | capE = [] 395 | for _id in idlst: 396 | _capS,_img,_capE = feed_image_caption(_id,lst) 397 | images.append(_img) 398 | capS.append(_capS) 399 | capE.append(_capE) 400 | return [[np.asarray(capS),np.asarray(images)],np.asarray(capE)] 401 | 402 | # Train for batch order 0,0,1,0,1,2,0,1,2,3,4,0,1,2,3,4,5.. 403 | def data_generator(lst, batch_size, start=0, isTrainSet = True): 404 | count = (len(lst.keys()))//batch_size 405 | #print("Max Unique Batches %d " % count) 406 | countValidation = 5#100 407 | countTrain = count - 100 408 | print("Validation Data : %d , Train Batches %d, BatchSize %d\tBatchOffset : %d" % (countValidation, countTrain, batch_size, start)) 409 | offset = 0 410 | left = countTrain 411 | extra = 0 412 | #start = 0 413 | if not isTrainSet: 414 | # Validation Data 415 | left = countValidation 416 | offset = countTrain * batch_size 417 | idlst = lst.keys()[offset:offset+left] 418 | yield datas_from_ids(idlst,lst) 419 | return 420 | # Training Data 421 | maxSequenceLength = countTrain*(countTrain+1)//2 422 | cbatch = 1 423 | batchId = 1 424 | iterBatch = 0 425 | 426 | for it in range(maxSequenceLength): 427 | if batchId == cbatch: 428 | batchId = 1 429 | cbatch *= 2 430 | if cbatch > countTrain: 431 | cbatch = countTrain 432 | else: 433 | batchId += 1 434 | 435 | iterBatch+=1 436 | if iterBatch<=start: 437 | continue 438 | idlst = lst.keys()[(batchId-1)*batch_size:(batchId)*batch_size] 439 | print("Batch Id %d Loaded" % (batchId-1)) 440 | yield datas_from_ids(idlst,lst) 441 | return 442 | 443 | def build_dataset(lst, batch_size = -1, val_size = 0,outerepoch=random.randint(0,10000)): 444 | logger.debug("Started") 445 | 446 | #_id = lst.keys()[0] 447 | #imageToVec(_id) 448 | #capVec = captionToVec(lst[_id]) 449 | #print(capVec) 450 | #print("Shape of CapVec %s " % str(np.shape(capVec))) 451 | train_set = [] 452 | val_set = [] 453 | if batch_size == -1: 454 | for i,_id in enumerate(lst.keys()): 455 | if i > 100: 456 | break 457 | train_set.append( get_image_caption(_id,lst)) 458 | else: 459 | tsize = batch_size 460 | count = (len(lst.keys())-val_size)//tsize 461 | print("Max Unique Outer Batches %d " % count) 462 | outerepoch = outerepoch%count 463 | oinds = outerepoch*tsize 464 | einds = (outerepoch+1)*tsize 465 | mylst = lst.keys()[oinds:einds] 466 | mylst.extend(lst.keys()[-val_size-1:]) 467 | mx = len(mylst) 468 | #mx = 1000 #########HERE########### 469 | splitKey = tsize #int(mx*0.9) 470 | 471 | print("Max Keys %d\tSplit keys %d" % (mx, splitKey)) 472 | todolist = [("Train set",train_set, batch_size,0,splitKey),("Validation Set",val_set, val_size,splitKey,mx-splitKey)] 473 | for (s,cset, batchsz, offset, datasz) in todolist: 474 | #indicies = np.random.choice(datasz, batchsz, replace=False) 475 | #indicies = indicies + offset 476 | for c,_id in enumerate(mylst[offset:(datasz+offset)]):# enumerate(indicies): 477 | #_id = lst.keys()[i] 478 | capimg = get_image_caption(_id,lst) 479 | #if c==0: 480 | # print("%s First Image Id %s with caption : %s " % (s,str(_id), capimg[0])) 481 | cset.append(capimg) 482 | if (c*100)%batchsz == 0: 483 | print("%s %d %% Loaded!" % (s, c*100/batchsz)) 484 | print("BS %d, VS %d " % (batch_size, val_size)) 485 | print("Shape of Training Set %s " % str(np.shape(train_set))) 486 | print("Shape of Validation Set %s " % str(np.shape(val_set))) 487 | logger.debug("Completed") 488 | return [train_set, val_set] 489 | 490 | ''' 491 | def train_generator(dataset): 492 | i = 0 493 | while i [caption] 66 | self.captions = dict() 67 | idcreated = set() 68 | # Training Set 69 | for sen in data_train['sentences']: 70 | _id = self.stringIdToInt(sen['video_id']) 71 | if _id not in idcreated: 72 | idcreated.add(_id) 73 | self.captions[_id] = [] 74 | self.captions[_id].append(sen['caption']) 75 | self.train_ids = list(idcreated) 76 | 77 | idcreated = set() 78 | # Test Set 79 | for sen in data_test['sentences']: 80 | _id = self.stringIdToInt(sen['video_id']) 81 | if _id not in idcreated: 82 | idcreated.add(_id) 83 | self.captions[_id] = [] 84 | self.captions[_id].append(sen['caption']) 85 | self.test_ids = list(idcreated) 86 | 87 | def set_vmodel(self,vmodel): 88 | self.vmodel = vmodel 89 | 90 | def getCaptionData(self): 91 | return self.captions 92 | 93 | def stringIdToInt(self,sid): 94 | assert(sid[:5]=='video') 95 | return int(sid[5:]) 96 | 97 | def getAllIds(self): 98 | return self.captions.keys() 99 | 100 | def getDownloadedIds(self): 101 | allfiles = os.listdir(self.vdir) 102 | vfiles = [] 103 | for f in allfiles: 104 | # Issue: getDownloadedIds is called before creation of *_ignore file 105 | # Program crases onces for creating these files then works normally 106 | if f.endswith(".mp4") and (not os.path.exists(os.path.join(self.vdir,f+"_ignore"))): 107 | if os.path.getsize("%s/%s" % (self.vdir,f)) >= VideoHandler.STHRES: 108 | vfiles.append(int(f[:-4])) 109 | return vfiles 110 | 111 | def filterMod100(self, parentlst, lst, _min, _max): 112 | parentlst = set(parentlst) 113 | lst = set(lst) 114 | flst = lst.intersection(parentlst) 115 | lst = list(flst) 116 | ids = [] 117 | for i,_id in enumerate(lst): 118 | if (i%100)>=_min and (i%100)<_max: 119 | ids.append(_id) 120 | return ids 121 | 122 | def getTrainingIds(self): 123 | return self.filterMod100(self.get_otrain_ids(), self.getDownloadedIds(), 0, self.splitTrainValid[0]) 124 | 125 | def getValidationIds(self): 126 | return self.filterMod100(self.get_otrain_ids(), self.getDownloadedIds(), self.splitTrainValid[0],100) 127 | 128 | def getTestIds(self): 129 | return self.filterMod100(self.get_otest_ids(), self.getDownloadedIds(), 0, 100) 130 | 131 | def get_otrain_ids(self): 132 | return self.train_ids 133 | 134 | def get_otest_ids(self): 135 | return self.test_ids 136 | 137 | def getYoutubeId(self,url): 138 | query = urllibparse.parse_qs(urllibparse.urlparse(url).query) 139 | print(query) 140 | return query['v'][0] 141 | 142 | def downloadVideo(self, _id, logs = True): 143 | video = self.vdata[_id] 144 | url = video['url'] 145 | stime = video['start time'] 146 | etime = video['end time'] 147 | sfname = "%s/%d.mp4" % (self.vdir, _id) 148 | if os.path.exists(sfname): 149 | if logs: 150 | print("Video Id [%d] Already Downloaded" % _id) 151 | return sfname 152 | youtubeId = self.getYoutubeId(url) 153 | turl = "curl 'https://hesetube.com/download.php?id=%s'" % (youtubeId) 154 | durl = "https://hesetube.com/video/%s.mp4?start=%f&end=%f" % (youtubeId, stime, etime) 155 | print(durl) 156 | print(turl) 157 | os.system(turl) 158 | cont = urllib.urlopen(durl).read() 159 | with open(sfname,"wb") as f: 160 | f.write(cont) 161 | print("Video Id [%d] Downloaded : %s " % (_id, youtubeId)) 162 | fs = os.path.getsize(sfname) 163 | if fs < VideoHandler.STHRES: 164 | print("Crosscheck failed, File Size : %d" % fs) 165 | with open(self.logfile,"a") as f: 166 | f.write("Crosscheck file %d, %s with size %d\n" % (_id, youtubeId, fs)) 167 | os.remove(sfname) 168 | open(sfname,'a').close() 169 | self.takebreak() 170 | return None 171 | else: 172 | self.takebreak() 173 | return sfname 174 | 175 | def takebreak(self): 176 | time.sleep(VideoHandler.SLEEPTIME) 177 | 178 | ''' 179 | Either frames of video from id or vfname 180 | ''' 181 | CRAZY = 0 182 | #@synchronized 183 | def get_crazy_id(self): 184 | VideoHandler.EXTRACT_COUNTER += 1 185 | return VideoHandler.EXTRACT_COUNTER 186 | 187 | def get_iframes_cached(self, _id): 188 | cfname = "%s/%d.npy" % (self.cdir, _id) 189 | if os.path.exists(cfname): 190 | f = open(cfname, 'rb') 191 | frames = np.load(f) 192 | assert len(frames) == self.LIMIT_FRAMES 193 | return frames 194 | return None 195 | 196 | def get_audio_cached(self, _id): 197 | afname = "%s/%d.npy" % (self.adir, _id) 198 | if os.path.exists(afname): 199 | f = open(afname, 'rb') 200 | feature = np.load(f) 201 | if np.shape(feature) != self.AUDIO_FEATURE: 202 | print("Feature Shape error at %d, %s" % (_id, np.shape(feature))) 203 | assert np.shape(feature) == self.AUDIO_FEATURE 204 | return feature 205 | return None 206 | 207 | def cached_iframe(self, _id, frames): 208 | cfname = "%s/%d.npy" % (self.cdir, _id) 209 | print("Cached %s" % cfname) 210 | with open(cfname, 'wb') as f: 211 | np.save(f,frames) 212 | 213 | def cached_audio(self, _id, feature): 214 | afname = "%s/%d.npy" % (self.adir, _id) 215 | print("Cached %s" % afname) 216 | with open(afname, 'wb') as f: 217 | np.save(f,feature) 218 | 219 | def file_to_videofeature(self, sfname): 220 | vcap = cv2.VideoCapture(sfname) 221 | success, frame = vcap.read() 222 | allframes = [] 223 | while True: 224 | success, frame = vcap.read() 225 | if not success: 226 | break 227 | allframes.append(cv2.resize(frame, VideoHandler.SHAPE)) 228 | if len(allframes) < self.LIMIT_FRAMES: 229 | print("File [%s] with limited frames (%d)" % (sfname, len(allframes))) 230 | # Ignore those videos 231 | os.system("touch %s_ignore" % sfname) 232 | return None 233 | 234 | period = len(allframes) // self.LIMIT_FRAMES 235 | rframes = allframes[:period * self.LIMIT_FRAMES:period] 236 | frames_out = self.vmodel.preprocess_partialmodel(rframes) 237 | return frames_out 238 | 239 | def file_to_audiofeature(self, sfname): 240 | audio_y, sr = librosa.load(sfname) 241 | afeatures = librosa.feature.mfcc(y=audio_y, sr=sr, n_mfcc=self.AUDIO_FEATURE[1]) 242 | afeatures = np.transpose(afeatures) 243 | ll = len(afeatures) 244 | parts = ll//self.AUDIO_FEATURE[0] 245 | division = [] 246 | for i in range(self.AUDIO_FEATURE[0] - 1): 247 | division.append((i+1)*parts) 248 | for i in range(ll%self.AUDIO_FEATURE[0]):#left over 249 | division[i]+=1 250 | afeatures = np.split(np.array(afeatures), division) 251 | afeature_out = [] 252 | for af in afeatures: 253 | afeature_out.append(np.mean(np.array(af),axis = 0)) 254 | afeature_out = np.asarray(afeature_out) 255 | if np.shape(afeature_out) != self.AUDIO_FEATURE: 256 | print("File [%s] with audio problem (%s)" % (sfname, str(np.shape(afeature_out)))) 257 | # Ignore videos 258 | os.system("touch %s_ignore" % sfname) 259 | return afeature_out 260 | 261 | # (Video Feature, Audio Feature) 262 | def get_iframes_audio(self, _id = None, sfname = None, logs = True, cache_id = None): 263 | assert (_id is None) ^ (sfname is None) 264 | # Load if cached 265 | frames_out = None 266 | afeature_out = None 267 | if _id is not None or cache_id is not None: 268 | if _id is not None: 269 | cache_id = _id 270 | frames_out = self.get_iframes_cached(cache_id) 271 | afeature_out = self.get_audio_cached(cache_id) 272 | if frames_out is not None and afeature_out is not None: 273 | return (frames_out, afeature_out) 274 | # Load frames from file 275 | if sfname is None: 276 | sfname = self.downloadVideo(_id, logs) 277 | if sfname is None: 278 | return None 279 | 280 | to_cache_video = False 281 | to_cache_audio = False 282 | 283 | if frames_out is None: 284 | frames_out = self.file_to_videofeature(sfname) 285 | to_cache_video = True 286 | 287 | if afeature_out is None: 288 | afeature_out = self.file_to_audiofeature(sfname) 289 | to_cache_audio = True 290 | 291 | # Cache it 292 | if _id is not None or cache_id is not None: 293 | if _id is not None: 294 | cache_id = _id 295 | if to_cache_video: 296 | self.cached_iframe(cache_id, frames_out) 297 | if to_cache_audio: 298 | self.cached_audio(cache_id, afeature_out) 299 | return (frames_out, afeature_out) 300 | 301 | def get_frames(self,_id = None, sfname = None, logs = True): 302 | assert (_id is None) ^ (sfname is None) 303 | if sfname is None: 304 | sfname = self.downloadVideo(_id, logs) 305 | if sfname is None: 306 | return None 307 | edir = "%s/v_%d" % (self.tdir, self.get_crazy_id()) 308 | if os.path.exists(edir): 309 | shutil.rmtree(edir) 310 | os.mkdir(edir) 311 | cmd = "ffmpeg -i %s -vf fps=%d -s %dx%d %s/0_%%03d.jpg &> /dev/null" % ( 312 | sfname, 5, VideoHandler.SHAPE[0], VideoHandler.SHAPE[1], edir) #&> /dev/null 313 | if logs: 314 | print(cmd) 315 | returnStatus = os.system(cmd) 316 | if returnStatus != 0: 317 | print("Extracting Failed : %s" % sfname) 318 | if os.path.exists(edir): 319 | print(cmd) 320 | print("Dir Exists") 321 | #shutil.rmtree(edir) 322 | return None 323 | files = os.listdir(edir) 324 | files = [("%s/%s"%(edir,f)) for f in files] 325 | LIMIT_FRAMES = 10 326 | if len(files) captions) 39 | logger.debug("Glove File %s\nEmbedding File %s\nVocab File %s\n" % (GLOVE_FILE, WORD_EMBEDDED_CACHE, VOCAB_FILE)) 40 | self.specialWords = dict() 41 | self.specialWords['START'] = '>' 42 | self.specialWords['END'] = '<' 43 | self.specialWords['NONE'] = '?!?' 44 | self.specialWords['EXTRA'] = '___' 45 | 46 | freshWordEmbedding = self.loadWordEmbedding(GLOVE_FILE) 47 | for word,enc in self.specialWords.items(): 48 | assert enc in self.wordEmbedding.keys() 49 | self.buildVocab(data, train_ids, freshWordEmbedding) 50 | logger.debug("Vocab Build Completed") 51 | 52 | def loadWordEmbedding(self, glove_file): 53 | self.wordEmbedding = loadFromPickleIfExists(WORD_EMBEDDED_CACHE) 54 | if self.wordEmbedding: 55 | logger.debug("Embedding Loaded") 56 | return False 57 | else: 58 | self.wordEmbedding = dict() 59 | with open(glove_file, 'r') as f: 60 | for i,line in enumerate(f): 61 | tokens = line.split() 62 | tokens = [tok.__str__() for tok in tokens] 63 | word = tokens[0] 64 | self.wordEmbedding[word] = np.asarray(tokens[1:], dtype='float32') 65 | minVal = float('inf') 66 | maxVal = -minVal 67 | for v in self.wordEmbedding.values(): 68 | for x in v: 69 | minVal = min(minVal,x) 70 | maxVal = max(maxVal,x) 71 | mapper = interp1d([minVal,maxVal],[-1,1]) 72 | logger.info("Mapping minVal[%f], maxVal[%f] to [-1,1] " % (minVal,maxVal)) 73 | for w in self.wordEmbedding: 74 | self.wordEmbedding[w] = mapper(self.wordEmbedding[w]) 75 | print("Cross Check") 76 | print(self.wordEmbedding['good']) 77 | self.saveEmbedding() 78 | return True 79 | 80 | def saveEmbedding(self): 81 | with open(WORD_EMBEDDED_CACHE, 'wb') as f: 82 | pickle.dump(self.wordEmbedding,f) 83 | logger.info("Embedding Saved!") 84 | 85 | def buildVocab(self, data, train_ids, trimEmbedding): 86 | self.ind2word = loadFromPickleIfExists(VOCAB_FILE) 87 | if not self.ind2word: 88 | logger.debug("Building Vocab") 89 | x = {} 90 | allWords = set() 91 | for w in self.wordEmbedding.keys(): 92 | allWords.add(w) 93 | logger.debug("Cached all Embedded Words") 94 | for _id,captions in data.items(): 95 | if _id not in train_ids: 96 | continue 97 | for cap in captions: 98 | for w in caption_tokenize(cap): 99 | if w not in allWords: 100 | continue 101 | if w not in x.keys(): 102 | x[w]=1 103 | else: 104 | x[w]+=1 105 | assert 'tshirt' not in x.keys() 106 | assert 'tshirt' not in allWords 107 | logger.debug("Iterated over all captions") 108 | self.ind2word = [] 109 | for w,enc in self.specialWords.items(): 110 | self.ind2word.append(enc) 111 | self.ind2word.extend([w for w in x.keys() if x[w]>=Vocab.WORD_MIN_FREQ]) 112 | with open(VOCAB_FILE,'wb') as f: 113 | pickle.dump(self.ind2word,f) 114 | logger.debug("Vocab File saved") 115 | logger.info("Vocab Size : %d"%len(self.ind2word)) 116 | self.word2ind = dict() 117 | for i,w in enumerate(self.ind2word): 118 | self.word2ind[w]=i 119 | assert 'tshirt' not in self.wordEmbedding.keys() 120 | assert 'tshirt' not in self.word2ind.keys() 121 | logger.debug("Words to be in vocab %d found %d" % (Vocab.VOCAB_SIZE, len(self.ind2word))) 122 | assert len(self.ind2word) == Vocab.VOCAB_SIZE 123 | if trimEmbedding: 124 | newEmbedding = dict() 125 | logger.debug("Trimming Word Embedding") 126 | for w in self.ind2word: 127 | newEmbedding[w] = self.wordEmbedding[w] 128 | self.wordEmbedding=newEmbedding 129 | logger.debug("Trimming Word Embedding Done") 130 | self.saveEmbedding() 131 | 132 | def get_filteredword(self,w): 133 | if w in self.word2ind.keys(): 134 | return w 135 | return self.specialWords['EXTRA'] 136 | 137 | def fit_caption_tokens(self,tokens,length,addPrefix,addSuffix): 138 | tok = [] 139 | tokens = tokens[0:length] 140 | if addPrefix: 141 | tok.append(self.specialWords['START']) 142 | tok.extend(tokens) 143 | if addSuffix: 144 | tok.append(self.specialWords['END']) 145 | for i in range(length-len(tokens)): 146 | tok.append(self.specialWords['NONE']) 147 | return tok 148 | 149 | def onehot_word(self,w): 150 | encode = [0] * Vocab.VOCAB_SIZE 151 | encode[self.word2ind[w]] = 1 152 | return encode 153 | 154 | def word_fromonehot(self, onehot): 155 | index = np.argmax(onehot) 156 | return self.ind2word[index] 157 | 158 | def get_caption_encoded(self,caption,glove, addPrefix, addSuffix): 159 | tokens = caption_tokenize(caption) 160 | tokens = self.fit_caption_tokens(tokens, Vocab.CAPTION_LEN, addPrefix, addSuffix) 161 | tokens = [self.get_filteredword(x) for x in tokens] 162 | # logger.debug("Working on Caption %s " % str(tokens)) 163 | if glove: 164 | return [self.wordEmbedding[x] for x in tokens] 165 | else: 166 | return [self.onehot_word(x) for x in tokens] 167 | 168 | def get_caption_from_indexs(self,indx): 169 | s = ' '.join([self.ind2word[x] for x in indx]) 170 | return s 171 | 172 | def vocabBuilder(): 173 | vHandler = VideoHandler(VideoHandler.s_fname_train, VideoHandler.s_fname_test) 174 | train_ids = vHandler.get_otrain_ids() 175 | captionData = vHandler.getCaptionData() 176 | vocab = Vocab(captionData, train_ids) 177 | return [vHandler, vocab] 178 | 179 | if __name__ == "__main__": 180 | vocabBuilder() -------------------------------------------------------------------------------- /src/backend/vpreprocess.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import os 4 | 5 | from keras.preprocessing import image 6 | 7 | from common.config import get_vpreprocess_config 8 | from common.logger import logger 9 | from backend.vocab import vocabBuilder 10 | 11 | 12 | BADLOGS = get_vpreprocess_config()["LOGS_DIR"] 13 | 14 | def badLogs(msg): 15 | logger.debug(msg) 16 | with open(BADLOGS,"a") as f: 17 | f.write(msg) 18 | 19 | class Preprocessor: 20 | def __init__(self): 21 | self.vHandler,self.vocab = vocabBuilder() 22 | 23 | def set_vmodel(self, vmodel): 24 | self.vHandler.set_vmodel(vmodel) 25 | 26 | def imageToVec(self, fname): 27 | NEED_W = 224 28 | NEED_H = 224 29 | img = image.load_img(fname, target_size=(NEED_H, NEED_W)) 30 | x = image.img_to_array(img) 31 | x /= 255. 32 | x -= 0.5 33 | x *= 2. 34 | return x 35 | 36 | ''' 37 | Either convert videos from ids or frame file names 38 | ''' 39 | COUNTER = 0 40 | def videoToVec(self, _id = None, vfname = None, cache_id = None): 41 | assert (_id is None) ^ (vfname is None) 42 | if not _id == None: 43 | out = self.vHandler.get_iframes_audio(_id = _id, logs = False) 44 | else: 45 | out = self.vHandler.get_iframes_audio(sfname = vfname, logs = False, cache_id = cache_id) 46 | if out is None: 47 | return None 48 | (frames, afeatures) = out 49 | return frames, afeatures 50 | # deprecated 51 | fnames = None 52 | edir = None 53 | if fnames is None: 54 | ef = self.vHandler.get_frames(_id = _id, logs = False) 55 | if ef is not None: 56 | edir, fnames = ef 57 | if fnames is None: 58 | return None 59 | content = [] 60 | for i,fname in enumerate(fnames): 61 | content.append(self.imageToVec(fname)) 62 | self.vHandler.free_frames(edir) 63 | 64 | #if len(fnames)>0: 65 | # os.system("cp \"%s\" ~/TESTING/%04d.jpg" % (fnames[0],Preprocessor.COUNTER)) 66 | # Preprocessor.COUNTER += 1 67 | return content 68 | 69 | def get_video_content(self, vfname, cache_id = None): 70 | return self.videoToVec(vfname = vfname, cache_id = cache_id) 71 | 72 | def get_video_caption(self, _id, just_one_caption = True): 73 | vid_a = self.videoToVec(_id = _id) 74 | if vid_a is None: 75 | return None 76 | (vid, afeature) = vid_a 77 | data = self.vHandler.getCaptionData() 78 | out = [] 79 | for cur_caption in data[_id]: 80 | captionIn = self.vocab.get_caption_encoded(cur_caption, True, True, False) 81 | captionOut = self.vocab.get_caption_encoded(cur_caption, False, False, True) 82 | out.append([afeature,vid,captionIn,captionOut]) 83 | if len(out) == 0: 84 | return None 85 | if just_one_caption: 86 | return [random.choice(out)] 87 | return out 88 | 89 | def datas_from_ids(self, idlst): 90 | logger.debug("\n Loading Video/Captions for ids : %s" % str(idlst)) 91 | afeatures = [] 92 | vids = [] 93 | capIn = [] 94 | capOut = [] 95 | for _id in idlst: 96 | vccs = self.get_video_caption(_id, just_one_caption = True) 97 | if vccs is None: 98 | continue 99 | for vcc in vccs: 100 | _afeature, _vid, _capIn, _capOut = vcc 101 | afeatures.append(_afeature) 102 | vids.append(_vid) 103 | capIn.append(_capIn) 104 | capOut.append(_capOut) 105 | afeatures = np.asarray(afeatures) 106 | capIn = np.asarray(capIn) 107 | capOut = np.asarray(capOut) 108 | vids = np.asarray(vids) 109 | 110 | logger.debug("Shape vids %s [max distinct %d]" % (str(np.shape(vids)),len(idlst))) 111 | logger.debug("Shape afeatures %s" % str(np.shape(afeatures))) 112 | logger.debug("Shape CapIn %s" % str(np.shape(capIn))) 113 | logger.debug("Shape CapOut %s" % str(np.shape(capOut))) 114 | 115 | 116 | return [[capIn,afeatures,vids],capOut] 117 | 118 | def get_nextbatch(self, batch_size, arr_counter, ids): 119 | assert len(ids) > 0 120 | count = arr_counter[0] 121 | start = (count * batch_size) % len(ids) 122 | idlst = [] 123 | for i in xrange(batch_size): 124 | idlst.append(ids[start]) 125 | start = (start + 1) % len(ids) 126 | 127 | count = (count +1 ) % len(ids) 128 | arr_counter[0] = count 129 | return idlst 130 | 131 | ''' 132 | typeSet 0:Training dataset, 1: Validation dataset, 2: Test Dataset 133 | ''' 134 | # Sequential 135 | def data_generator(self, batch_size, start=0, typeSet = 0): 136 | if typeSet == 0: 137 | ids = self.vHandler.getTrainingIds() 138 | elif typeSet == 1: 139 | ids = self.vHandler.getValidationIds() 140 | elif typeSet == 2: 141 | ids = self.vHandler.getTestIds() 142 | else: 143 | assert False 144 | random.shuffle(ids) 145 | arr_counter = [0] 146 | count = (len(ids)+batch_size-1)//batch_size 147 | assert count > 0 148 | logger.debug("Max Batches of type %d : %d " % (typeSet, count)) 149 | while True: 150 | idlst = self.get_nextbatch(batch_size, arr_counter, ids) 151 | data = self.datas_from_ids(idlst) 152 | ndata = [] 153 | for d in data: 154 | if d is not None: 155 | ndata.append(d) 156 | if len(ndata) > 0: 157 | yield ndata 158 | 159 | # Random 160 | def data_generator_random(self, batch_size, start=0, typeSet = 0): 161 | if typeSet == 0: 162 | ids = self.vHandler.getTrainingIds() 163 | elif typeSet == 1: 164 | ids = self.vHandler.getValidationIds() 165 | elif typeSet == 2: 166 | ids = self.vHandler.getTestIds() 167 | else: 168 | assert False 169 | random.shuffle(ids) 170 | count = (len(ids) + batch_size - 1)//batch_size 171 | assert count > 0 172 | if start == -1: 173 | start = random.randint(0,count) 174 | logger.debug("Max Batches of type %d : %d " % (typeSet, count)) 175 | #start = start % count 176 | while True: 177 | bs = batch_size 178 | if bs>len(ids): 179 | bs=len(ids) 180 | logger.debug("FORCE Reducing Batch Size to %d from %d",bs,batch_size) 181 | idlst = random.sample(ids,bs) 182 | data = self.datas_from_ids(idlst) 183 | ndata = [] 184 | for d in data: 185 | if d is not None: 186 | ndata.append(d) 187 | if len(ndata) > 0: 188 | yield ndata 189 | #start = (start + 1)%count 190 | -------------------------------------------------------------------------------- /src/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/common/__init__.py -------------------------------------------------------------------------------- /src/common/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration Parser for V2D 3 | """ 4 | 5 | import json 6 | import threading 7 | import os 8 | 9 | lock = threading.Lock() 10 | 11 | def get_config(): 12 | with lock: 13 | if hasattr(get_config, "config"): 14 | return get_config.config 15 | 16 | fname = os.environ.get("V2D_CONFIG_FILE", "config.json") 17 | with open(fname, "r") as fin: 18 | get_config.config = json.load(fin) 19 | return get_config.config 20 | 21 | def clear(): 22 | with lock: 23 | if hasattr(get_config, "config"): 24 | delattr(get_config, "config") 25 | 26 | def get_app_config(): 27 | return get_config()["app"] 28 | 29 | def get_rpc_config(): 30 | return get_config()["rpc"] 31 | 32 | def get_vpreprocess_config(): 33 | return get_config()["vpreprocess"] 34 | 35 | def get_vocab_config(): 36 | return get_config()["vocab"] 37 | 38 | def get_tests_config(): 39 | return get_config()["tests"] 40 | -------------------------------------------------------------------------------- /src/common/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logger = logging.getLogger('root') 4 | FORMAT = "[%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s" 5 | logging.basicConfig(format=FORMAT) 6 | logger.setLevel(logging.DEBUG) 7 | -------------------------------------------------------------------------------- /src/common/rpc.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import traceback 3 | 4 | from six.moves.xmlrpc_client import ServerProxy 5 | from six.moves.xmlrpc_server import SimpleXMLRPCServer 6 | 7 | from common.config import get_rpc_config 8 | from common.logger import logger 9 | 10 | 11 | CONFIG = get_rpc_config() 12 | SERVER_RUNAS = CONFIG["RPC_SERVER_RUNAS"] 13 | PORT = CONFIG["RPC_PORT"] 14 | SERVER_IP = CONFIG["RPC_ENDPOINT"] 15 | 16 | lock = threading.Lock() 17 | 18 | def rpc_decorator(f): 19 | def new_f(*args, **kwargs): 20 | try: 21 | return f(*args, **kwargs) 22 | except Exception as e: 23 | tb = traceback.format_exc() 24 | logger.error("Exception raised in rpc %s, %s\n%s" % (f, e, tb)) 25 | raise e 26 | return new_f 27 | 28 | def close_framework(): 29 | exit() 30 | 31 | def register_server(framework): 32 | print('Preparing for Register Server') 33 | server = SimpleXMLRPCServer((SERVER_RUNAS, PORT)) 34 | print('Listening to %d' % PORT) 35 | server.register_function(rpc_decorator(framework.predict_fnames), 'predict_fnames') 36 | server.register_function(rpc_decorator(framework.predict_ids), 'predict_ids') 37 | server.register_function(rpc_decorator(framework.get_weights_status), 'get_weights_status') 38 | server.register_function(rpc_decorator(close_framework), 'close_framework') 39 | print("[RPC][Server][Started]") 40 | try: 41 | server.serve_forever() 42 | except KeyboardInterrupt: 43 | raise 44 | except Exception: 45 | raise 46 | finally: 47 | print("[RPC][Server][Closing]") 48 | server.server_close() 49 | 50 | 51 | def get_rpc(): 52 | with lock: 53 | if hasattr(get_rpc, 'proxy'): 54 | return get_rpc.proxy 55 | get_rpc.proxy = ServerProxy("http://%s:%d/" % (SERVER_IP, PORT)) 56 | return get_rpc.proxy 57 | -------------------------------------------------------------------------------- /src/common/status.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class ModelWeightsStatus(Enum): 4 | NO_INFO = 0 5 | SUCCESS = 1 6 | MODEL_NOT_FOUND = 2 7 | WIP = 3 -------------------------------------------------------------------------------- /src/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": { 3 | "PREDICT_MODE_ONLY": true, 4 | "PREFIX":"/home/scopeinfinity/BTP/Video2Description/src/", 5 | "MAX_CONTENT_LENGTH": 10649600, 6 | "UPLOAD_FOLDER": "/home/scopeinfinity/BTP/Video2Description/uploads/", 7 | "VIDEOS_DATASET": "/home/scopeinfinity/BTP/Video2Description/dataset/", 8 | "VIDEOS_FOLDER": "/home/scopeinfinity/BTP/Video2Description/dataset/videos/", 9 | "DATASET_CACHE": "/home/scopeinfinity/BTP/Video2Description/dataset_cache/" 10 | }, 11 | "rpc": { 12 | "RPC_SERVER_RUNAS": "127.0.0.1", 13 | "RPC_PORT": 5001, 14 | "RPC_ENDPOINT": "127.0.0.1" 15 | }, 16 | "vpreprocess": { 17 | "COCOFNAME": "/home/scopeinfinity/cococaption/cocoeval.py", 18 | "LOGS_DIR": "/tmp/v2d_vpreprocess.log" 19 | }, 20 | "vocab": { 21 | "GLOVE_FILE": "/home/scopeinfinity/BTP/Video2Description/dataset/glove.6B.300d.txt", 22 | "WORD_EMBEDDED_CACHE": "/home/scopeinfinity/BTP/Video2Description/dataset_cache/glove_300.dat", 23 | "VOCAB_FILE": "/home/scopeinfinity/BTP/Video2Description/dataset_cache/vocab.dat" 24 | }, 25 | "tests": { 26 | "dir_videos": "./tests/data/videos" 27 | } 28 | } -------------------------------------------------------------------------------- /src/config_docker.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": { 3 | "PREDICT_MODE_ONLY": true, 4 | "PREFIX":"/home/si/v2d/src", 5 | "MAX_CONTENT_LENGTH": 10649600, 6 | "UPLOAD_FOLDER": "/home/si/v2d/uploads/", 7 | "VIDEOS_DATASET": "/home/si/v2d/dataset/", 8 | "VIDEOS_FOLDER": "/home/si/v2d/dataset/videos/", 9 | "DATASET_CACHE": "/home/si/v2d/dataset_cache/" 10 | }, 11 | "rpc": { 12 | "RPC_SERVER_RUNAS": "172.14.0.2", 13 | "RPC_PORT": 5001, 14 | "RPC_ENDPOINT": "172.14.0.2" 15 | }, 16 | "vpreprocess": { 17 | "COCOFNAME": "/home/si/coco-caption/pycocoevalcap/eval.py", 18 | "LOGS_DIR": "/var/log/v2d/vpreprocess.log" 19 | }, 20 | "vocab": { 21 | "GLOVE_FILE": "/home/si/v2d/dataset/glove.6B.300d.txt", 22 | "WORD_EMBEDDED_CACHE": "/home/si/v2d/dataset_cache/glove_300.dat", 23 | "VOCAB_FILE": "/home/si/v2d/dataset_cache/vocab.dat" 24 | }, 25 | "tests": { 26 | "dir_videos": "./tests/data/videos" 27 | } 28 | } -------------------------------------------------------------------------------- /src/frontend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/frontend/__init__.py -------------------------------------------------------------------------------- /src/frontend/app.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import random 5 | import re 6 | import traceback 7 | 8 | from copy import deepcopy 9 | from flask import Flask, render_template, request, send_from_directory 10 | from waitress import serve 11 | 12 | from common.config import get_app_config 13 | from common.rpc import get_rpc 14 | from common.status import ModelWeightsStatus 15 | 16 | app = Flask(__name__) 17 | config = get_app_config() 18 | 19 | PREDICT_MODE_ONLY = config["PREDICT_MODE_ONLY"] 20 | PREFIX = config["PREFIX"] 21 | app.config['MAX_CONTENT_LENGTH'] = config["MAX_CONTENT_LENGTH"] 22 | app.config['UPLOAD_FOLDER'] = config["UPLOAD_FOLDER"] 23 | app.config['VIDEOS_FOLDER'] = config["VIDEOS_FOLDER"] 24 | 25 | navigation = [("./","Predict",False)] 26 | 27 | if not PREDICT_MODE_ONLY: 28 | navigation.extend([("./get_ids","Get ID's",False),("./play","Play Videos",False)]) 29 | 30 | # Don't even define the methods! And definately need to improve this. 31 | def get_train_ids(): 32 | command = "python %s/backend/videohandler.py -strain" % PREFIX 33 | return os.popen(command).read() 34 | 35 | def get_test_ids(): 36 | command = "python %s/backend/videohandler.py -stest" % PREFIX 37 | return os.popen(command).read() 38 | 39 | def get_val_ids(): 40 | command = "python %s/backend/videohandler.py -sval" % PREFIX 41 | return os.popen(command).read() 42 | 43 | def get_all_ids(): 44 | command = "python %s/backend/videohandler.py -sval -stest -strain" % PREFIX 45 | return os.popen(command).read() 46 | 47 | def predict_ids(ids): 48 | proxy = get_rpc() 49 | return proxy.predict_ids(ids) 50 | 51 | @app.route("/play") 52 | def play(): 53 | return render_template('play.html', navigation = getactivenav(2)) 54 | 55 | @app.route("/get_ids") 56 | def get_ids(): 57 | content = dict() 58 | content['ids'] = get_all_ids() 59 | return render_template('get_ids.html', navigation=getactivenav(1), content = content).replace("]","]") 60 | 61 | @app.route("/predict") 62 | def predict_page(fnames = None): 63 | if request.args.get('fnames'): 64 | return computeAndRenderPredictionFnames(re.sub("[^0-9 ]", "", request.args.get('fnames'))) 65 | if (not PREDICT_MODE_ONLY) and request.args.get('ids'): 66 | return computeAndRenderPredictionIDs(ids = re.sub("[^0-9 ]", "", request.args.get('ids'))) 67 | return "Invalid Request" 68 | 69 | @app.route('/download', methods=['GET']) 70 | def download_file(): 71 | _id = request.args.get('id') 72 | if _id and unicode(_id).isnumeric(): 73 | return send_from_directory(app.config['VIDEOS_FOLDER'],str(_id)+".mp4") 74 | return "File Not Exists" 75 | return "Invalid Request" 76 | 77 | def predict_fnames(fnames): 78 | proxy = get_rpc() 79 | return proxy.predict_fnames(fnames) 80 | 81 | def model_weights_notify(): 82 | proxy = get_rpc() 83 | try: 84 | status = proxy.get_weights_status() 85 | if status == str(ModelWeightsStatus.SUCCESS): 86 | return None 87 | return status 88 | except Exception as e: 89 | print("model_weights_notify failed: %s" % e) 90 | return "Failed to communicate." 91 | 92 | def getactivenav(index): 93 | nav = deepcopy(navigation) 94 | nav[index] = (nav[index][0], nav[index][1], True) 95 | return nav 96 | 97 | @app.route("/model_weights_status") 98 | def model_weights_status(): 99 | return model_weights_notify() or "[SUCCESS]" 100 | 101 | @app.route("/") 102 | def home(): 103 | weights_notify = model_weights_notify() 104 | if PREDICT_MODE_ONLY: 105 | return render_template( 106 | 'publicindex.html', 107 | weights_notify = weights_notify) 108 | else: 109 | return render_template('index.html', navigation = getactivenav(0)) 110 | 111 | def computeAndRenderPredictionIDs(ids): 112 | content = dict() 113 | content['ids'] = ids 114 | content['data_ids'] = predict_ids(ids) 115 | return render_template('predict.html', content = content) 116 | 117 | def computeAndRenderPredictionFnames(fnames): 118 | content = dict() 119 | content['fnames'] = fnames 120 | content['data_fnames'] = predict_fnames(fnames) 121 | return render_template('predict.html', content = content) 122 | 123 | # http://flask.pocoo.org/docs/0.12/patterns/fileuploads/ 124 | def allowed_file(filename): 125 | return '.' in filename and \ 126 | filename.rsplit('.', 1)[1].lower() in ['mp4'] 127 | 128 | def error(msg): 129 | return json.dumps({'error':msg}) 130 | 131 | def success(data): 132 | return json.dumps({'success':data}) 133 | 134 | @app.route('/upload', methods=['POST']) 135 | def upload_file(): 136 | print(request.files) 137 | if request.method != "POST": 138 | return error("Only POST requests are expected!") 139 | if "file" not in request.files: 140 | return error("No filess found!") 141 | file = request.files['file'] 142 | if not file: 143 | return error("No file found!") 144 | if file.filename == '': 145 | return error("No filename found!") 146 | if not allowed_file(file.filename): 147 | return error("Only *.mp4 video files are supported at this moment!") 148 | filename = str(random.randint(0,1000000)) + ".mp4" 149 | filename = os.path.join(app.config["UPLOAD_FOLDER"], filename) 150 | try: 151 | file.save(filename) 152 | print("File uploaded: %s" % filename) 153 | output = json.loads(predict_fnames([filename])) 154 | except Exception as e: 155 | print(traceback.format_exc()) 156 | return error("Request Failed! Exception caught while generating caption.") 157 | finally: 158 | os.unlink(filename) 159 | return success(output) 160 | 161 | def main(): 162 | parser = argparse.ArgumentParser() 163 | parser.add_argument('-p', '--port', type = int, default=5000) 164 | args = parser.parse_args() 165 | serve(app, host='0.0.0.0', port=args.port) 166 | 167 | if __name__ == "__main__": 168 | main() -------------------------------------------------------------------------------- /src/frontend/templates/get_ids.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block content %} 3 | 4 | Train / Validation / Test ID's 5 | {{ content['ids'] }} 6 | 7 | {% endblock %} 8 | -------------------------------------------------------------------------------- /src/frontend/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block content %} 3 | 4 | Predict Using IDs 5 | 6 | 7 | 8 | Enter ID's (space separated) 9 | 10 | 11 | Submit 12 | 13 | 14 | 15 | 16 | 17 | Predict Using File Names 18 | 19 | 20 | 21 | Enter Filenames (space separated) 22 | 23 | 24 | Submit 25 | 26 | 27 | 28 | 29 | 30 | Predict via Uploading Media 31 | 32 | 33 | 34 | Upload File 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | {% endblock %} 43 | -------------------------------------------------------------------------------- /src/frontend/templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Video2Description 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 24 | 25 | {% if navigation %} 26 | 27 | 28 | 29 | Video2Description 30 | 31 | 32 | {% for (link,title,active) in navigation %} 33 | {{ title }} 34 | {% endfor %} 35 | 36 | 37 | 38 | {% endif %} 39 | {% block content %}{% endblock %} 40 | 41 | 42 | -------------------------------------------------------------------------------- /src/frontend/templates/play.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block content %} 3 | 4 | 5 | Play Video 6 | 7 | 8 | Enter ID 9 | 10 | 11 | Play 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 34 | 35 | {% endblock %} 36 | -------------------------------------------------------------------------------- /src/frontend/templates/predict.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block content %} 3 | {% if content['ids'] %} 4 | 5 | Processed{{ content['ids'] }} 6 | 7 | 8 | {{ content['data_ids'] }} 9 | 10 | 11 | 12 | {% endif %} 13 | {% if content['fnames'] %} 14 | 15 | Processed{{ content['fnames'] }} 16 | 17 | 18 | {{ content['data_fnames'] }} 19 | 20 | 21 | 22 | {% endif %} 23 | {% endblock %} 24 | -------------------------------------------------------------------------------- /src/frontend/templates/publicindex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Video2Description 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | {% if weights_notify %} 13 | Backed not ready: {{ weights_notify }} 14 | {% endif %} 15 | 16 | 17 | Describe video using text? 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | Keep Smiling :) 26 | 27 | 28 | 29 | Description of video 30 | Please upload some file. 31 | 32 | 33 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /src/run_tests.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | python -m unittest discover tests/ -------------------------------------------------------------------------------- /src/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/tests/__init__.py -------------------------------------------------------------------------------- /src/tests/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/tests/data/__init__.py -------------------------------------------------------------------------------- /src/tests/data/fetcher.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from common.config import get_tests_config 4 | 5 | 6 | DIR_VIDEOS = get_tests_config()["dir_videos"] 7 | 8 | def get_videopath(fname): 9 | '''Returns path of given test video file.''' 10 | return os.path.join(DIR_VIDEOS, fname) -------------------------------------------------------------------------------- /src/tests/data/test_fetcher.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | 4 | from tests.data import fetcher 5 | 6 | 7 | class TestFetcher(unittest.TestCase): 8 | 9 | def test_get_videopath_success(self): 10 | path = fetcher.get_videopath(".content") 11 | self.assertTrue(os.path.exists(path)) 12 | with open(path, "r") as f: 13 | self.assertEqual("I_AM_VIDEO_TESTDATA_DIR", f.read().strip()) 14 | 15 | def test_get_videopath_failure(self): 16 | path = fetcher.get_videopath("bad_filename.mp4") 17 | self.assertFalse(os.path.exists(path)) 18 | 19 | 20 | if __name__ == '__main__': 21 | unittest.main() 22 | -------------------------------------------------------------------------------- /src/tests/data/videos/.content: -------------------------------------------------------------------------------- 1 | I_AM_VIDEO_TESTDATA_DIR 2 | -------------------------------------------------------------------------------- /src/tests/data/videos/12727.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/tests/data/videos/12727.mp4 -------------------------------------------------------------------------------- /src/tests/data/videos/12968.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/tests/data/videos/12968.mp4 -------------------------------------------------------------------------------- /src/tests/env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/src/tests/env/__init__.py -------------------------------------------------------------------------------- /src/tests/env/test_config.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | 4 | from common.config import clear, get_config 5 | 6 | class TestConfig(unittest.TestCase): 7 | 8 | def setUp(self): 9 | clear() 10 | if 'V2D_CONFIG_FILE' in os.environ: 11 | del os.environ['V2D_CONFIG_FILE'] 12 | 13 | def test_json(self): 14 | self.assertTrue(get_config()) 15 | 16 | def test_json_docker(self): 17 | os.environ['V2D_CONFIG_FILE'] = 'config_docker.json' 18 | self.assertTrue(get_config()) 19 | 20 | def test_json_bad_file(self): 21 | os.environ['V2D_CONFIG_FILE'] = 'config_bad_filename.json' 22 | with self.assertRaises(IOError): 23 | get_config() 24 | 25 | if __name__ == '__main__': 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /src/tests/env/test_video_parsing.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import unittest 3 | import os 4 | 5 | from tests.data import fetcher 6 | 7 | 8 | class TestVideoParsing(unittest.TestCase): 9 | 10 | def test_opencv_videocapture(self): 11 | path = fetcher.get_videopath("12727.mp4") 12 | self.assertTrue(os.path.exists(path)) 13 | vcap = cv2.VideoCapture(path) 14 | success_count = 0 15 | while True: 16 | success, _ = vcap.read() 17 | if not success: 18 | break 19 | success_count += 1 20 | self.assertGreater(success_count, 3*15) 21 | self.assertLess(success_count, 15*30) 22 | 23 | 24 | if __name__ == '__main__': 25 | unittest.main() 26 | -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/tests/e2e/__init__.py -------------------------------------------------------------------------------- /tests/e2e/test_external.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | import os 4 | 5 | from parameterized import parameterized 6 | from selenium.webdriver import Firefox 7 | from selenium.webdriver.firefox.options import Options 8 | from selenium.webdriver.common.by import By 9 | from selenium.webdriver.support import expected_conditions as EC 10 | from selenium.webdriver.support.ui import WebDriverWait 11 | 12 | WEB_URL = "http://localhost:8080" 13 | ROOT_PATH = "src" 14 | CONFIG_FILE = "src/config.json" 15 | 16 | 17 | class TestExternal(unittest.TestCase): 18 | """Test from as a end user.""" 19 | 20 | def setUp(self): 21 | options = Options() 22 | options.add_argument('-headless') 23 | self.driver = Firefox(options=options) 24 | 25 | def tearDown(self): 26 | self.driver.close() 27 | 28 | def get_video_path(self, fname): 29 | with open(CONFIG_FILE, "r") as fin: 30 | dir_videos = json.load(fin)["tests"]["dir_videos"] 31 | path = os.path.abspath( 32 | os.path.join(ROOT_PATH, dir_videos, fname)) 33 | self.assertTrue(os.path.exists(path)) 34 | return path 35 | 36 | @parameterized.expand([ 37 | ("12727.mp4", "two men are talking about a cooking show"), 38 | ("12968.mp4", "a woman is talking about a makeup face"), 39 | ]) 40 | def test_upload_and_verify(self, fname, caption): 41 | """ 42 | Tests uploading a video and verify the response. 43 | Note: The tests values are currently hard coded to a specific trained 44 | model and it might fail for other models. 45 | """ 46 | self.driver.get(WEB_URL) 47 | video_path = self.get_video_path(fname) 48 | text_vprocessing = "Video is being uploaded and processed" 49 | self.driver.find_element_by_xpath("//input[@type='file']").send_keys(video_path) 50 | self.assertNotIn(text_vprocessing, 51 | self.driver.find_element_by_id("notifications").text) 52 | self.driver.find_element_by_xpath("//input[@value='Upload Video']").click() 53 | self.assertIn(text_vprocessing, 54 | self.driver.find_element_by_id("notifications").text) 55 | WebDriverWait(self.driver, 120).until( 56 | EC.text_to_be_present_in_element( 57 | (By.ID, "notifications"), "Request Completed") 58 | ) 59 | self.assertIn(caption, self.driver.find_element_by_id("results").text.lower()) 60 | -------------------------------------------------------------------------------- /uploads/touched: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scopeInfinity/Video2Description/538568b42c89973cc18431d15ce3e7748a81e302/uploads/touched --------------------------------------------------------------------------------
8 | {{ content['data_ids'] }} 9 |
18 | {{ content['data_fnames'] }} 19 |