├── .gitignore
├── LICENSE
├── README.md
├── assets
    └── cfvqa.png
├── cfvqa
    ├── __init__.py
    ├── __version__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── factory.py
    │   ├── scripts
    │   │   ├── download_vqa2.sh
    │   │   └── download_vqacp2.sh
    │   ├── vqa2.py
    │   ├── vqacp.py
    │   └── vqacp2.py
    ├── models
    │   ├── criterions
    │   │   ├── __init__.py
    │   │   ├── cfvqa_criterion.py
    │   │   ├── factory.py
    │   │   └── rubi_criterion.py
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   ├── factory.py
    │   │   ├── vqa_cfvqa_metrics.py
    │   │   ├── vqa_cfvqasimple_metrics.py
    │   │   └── vqa_rubi_metrics.py
    │   └── networks
    │   │   ├── __init__.py
    │   │   ├── cfvqa.py
    │   │   ├── factory.py
    │   │   ├── rubi.py
    │   │   ├── san_net.py
    │   │   ├── smrl_net.py
    │   │   ├── updn_net.py
    │   │   └── utils.py
    ├── optimizers
    │   ├── __init__.py
    │   └── factory.py
    └── options
    │   ├── vqa2
    │       ├── smrl_baseline.yaml
    │       ├── smrl_cfvqa_hm.yaml
    │       ├── smrl_cfvqa_sum.yaml
    │       ├── smrl_cfvqasimple_hm.yaml
    │       ├── smrl_cfvqasimple_rubi.yaml
    │       ├── smrl_cfvqasimple_sum.yaml
    │       └── smrl_rubi.yaml
    │   └── vqacp2
    │       ├── smrl_baseline.yaml
    │       ├── smrl_cfvqa_hm.yaml
    │       ├── smrl_cfvqa_sum.yaml
    │       ├── smrl_cfvqasimple_hm.yaml
    │       ├── smrl_cfvqasimple_rubi.yaml
    │       ├── smrl_cfvqasimple_sum.yaml
    │       └── smrl_rubi.yaml
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | data/
132 | data
133 | logs/
134 | logs
135 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Counterfactual VQA (CF-VQA)
  2 | 
  3 | This repository is the Pytorch implementation of our paper ["Counterfactual VQA: A Cause-Effect Look at Language Bias"](https://arxiv.org/abs/2006.04315) in CVPR 2021. This code is implemented as a fork of [RUBi][1].
  4 | 
  5 | CF-VQA is proposed to capture and mitigate language bias in VQA from the view of causality. CF-VQA (1) captures the language bias as the direct causal effect of questions on answers, and (2) reduces the language bias by subtracting the direct language effect from the total causal effect.
  6 | 
  7 | <p align="center">
  8 |     <img src="assets/cfvqa.png" />
  9 | </p>
 10 | 
 11 | 
 12 | If you find this paper helps your research, please kindly consider citing our paper in your publications.
 13 | ```
 14 | @inproceedings{niu2020counterfactual,
 15 |   title={Counterfactual VQA: A Cause-Effect Look at Language Bias},
 16 |   author={Niu, Yulei and Tang, Kaihua and Zhang, Hanwang and Lu, Zhiwu and Hua, Xian-Sheng and Wen, Ji-Rong},
 17 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
 18 |   year={2021}
 19 | }
 20 | ```
 21 | ## Summary
 22 | 
 23 | * [Installation](#installation)
 24 |     * [Setup and dependencies](#1-setup-and-dependencies)
 25 |     * [Download datasets](#2-download-datasets)
 26 | * [Quick start](#quick-start)
 27 |     * [Train a model](#train-a-model)
 28 |     * [Evaluate a model](#evaluate-a-model)
 29 | * [Useful commands](#useful-commands)
 30 | * [Acknowledgment](#acknowledgment)
 31 | 
 32 | ## Installation
 33 | 
 34 | 
 35 | ### 1. Setup and dependencies
 36 | 
 37 | Install Anaconda or Miniconda distribution based on Python3+ from their downloads' site.
 38 | 
 39 | ```bash
 40 | conda create --name cfvqa python=3.7
 41 | source activate cfvqa
 42 | pip install -r requirements.txt
 43 | ```
 44 | 
 45 | ### 2. Download datasets
 46 | 
 47 | Download annotations, images and features for VQA experiments:
 48 | ```bash
 49 | bash cfvqa/datasets/scripts/download_vqa2.sh
 50 | bash cfvqa/datasets/scripts/download_vqacp2.sh
 51 | ```
 52 | 
 53 | 
 54 | ## Quick start
 55 | 
 56 | 
 57 | ### Train a model
 58 | 
 59 | The [bootstrap/run.py](https://github.com/Cadene/bootstrap.pytorch/blob/master/bootstrap/run.py) file load the options contained in a yaml file, create the corresponding experiment directory and start the training procedure. For instance, you can train our best model on VQA-CP v2 (CFVQA+SUM+SMRL) by running:
 60 | ```bash?
 61 | python -m bootstrap.run -o cfvqa/options/vqacp2/smrl_cfvqa_sum.yaml
 62 | ```
 63 | Then, several files are going to be created in `logs/vqacp2/smrl_cfvqa_sum/`:
 64 | - [options.yaml] (copy of options)
 65 | - [logs.txt] (history of print)
 66 | - [logs.json] (batchs and epochs statistics)
 67 | - **[\_vq\_val\_oe.json] (statistics for the language-prior based strategy, e.g., RUBi)**
 68 | - **[\_cfvqa\_val\_oe.json] (statistics for CF-VQA)**
 69 | - [\_q\_val\_oe.json] (statistics for language-only branch)
 70 | - [\_v\_val\_oe.json] (statistics for vision-only branch)
 71 | - [\_all\_val\_oe.json] (statistics for the ensembled branch)
 72 | - ckpt_last_engine.pth.tar (checkpoints of last epoch)
 73 | - ckpt_last_model.pth.tar
 74 | - ckpt_last_optimizer.pth.tar
 75 | 
 76 | Many options are available in the options directory. CFVQA represents the complete causal graph while cfvqas represents the simplified causal graph.
 77 | 
 78 | ### Evaluate a model
 79 | 
 80 | There is no test set on VQA-CP v2, our main dataset. The evaluation is done on the validation set. For a model trained on VQA v2, you can evaluate your model on the test set. In this example, [boostrap/run.py](https://github.com/Cadene/bootstrap.pytorch/blob/master/bootstrap/run.py) load the options from your experiment directory, resume the best checkpoint on the validation set and start an evaluation on the testing set instead of the validation set while skipping the training set (train_split is empty). Thanks to `--misc.logs_name`, the logs will be written in the new `logs_predicate.txt` and `logs_predicate.json` files, instead of being appended to the `logs.txt` and `logs.json` files.
 81 | ```bash
 82 | python -m bootstrap.run \
 83 | -o ./logs/vqacp2/smrl_cfvqa_sum/options.yaml \
 84 | --exp.resume last \
 85 | --dataset.train_split ''\
 86 | --dataset.eval_split val \
 87 | --misc.logs_name test 
 88 | ```
 89 | 
 90 | ## Useful commands
 91 | 
 92 | 
 93 | ### Use a specific GPU
 94 | 
 95 | For a specific experiment:
 96 | ```bash
 97 | CUDA_VISIBLE_DEVICES=0 python -m bootstrap.run -o cfvqa/options/vqacp2/smrl_cfvqa_sum.yaml
 98 | ```
 99 | 
100 | For the current terminal session:
101 | ```bash
102 | export CUDA_VISIBLE_DEVICES=0
103 | ```
104 | 
105 | ### Overwrite an option
106 | 
107 | The boostrap.pytorch framework makes it easy to overwrite a hyperparameter. In this example, we run an experiment with a non-default learning rate. Thus, I also overwrite the experiment directory path:
108 | ```bash
109 | python -m bootstrap.run -o cfvqa/options/vqacp2/smrl_cfvqa_sum.yaml \
110 | --optimizer.lr 0.0003 \
111 | --exp.dir logs/vqacp2/smrl_cfvqa_sum_lr,0.0003
112 | ```
113 | 
114 | ### Resume training
115 | 
116 | If a problem occurs, it is easy to resume the last epoch by specifying the options file from the experiment directory while overwritting the `exp.resume` option (default is None):
117 | ```bash
118 | python -m bootstrap.run -o logs/vqacp2/smrl_cfvqa_sum/options.yaml \
119 | --exp.resume last
120 | ```
121 | 
122 | 
123 | ## Acknowledgment
124 | 
125 | Special thanks to the authors of [RUBi][1], [BLOCK][2], and [bootstrap.pytorch][3], and the datasets used in this research project.
126 | 
127 | 
128 | [1]: https://github.com/cdancette/rubi.bootstrap.pytorch
129 | [2]: https://github.com/Cadene/block.bootstrap.pytorch
130 | [3]: https://github.com/Cadene/bootstrap.pytorch
131 | 


--------------------------------------------------------------------------------
/assets/cfvqa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuleiniu/cfvqa/736795bef50db9c76818f9a08202c7f325489afd/assets/cfvqa.png


--------------------------------------------------------------------------------
/cfvqa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuleiniu/cfvqa/736795bef50db9c76818f9a08202c7f325489afd/cfvqa/__init__.py


--------------------------------------------------------------------------------
/cfvqa/__version__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.0.0'
2 | 


--------------------------------------------------------------------------------
/cfvqa/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuleiniu/cfvqa/736795bef50db9c76818f9a08202c7f325489afd/cfvqa/datasets/__init__.py


--------------------------------------------------------------------------------
/cfvqa/datasets/factory.py:
--------------------------------------------------------------------------------
  1 | from bootstrap.lib.options import Options
  2 | from block.datasets.tdiuc import TDIUC
  3 | from block.datasets.vrd import VRD
  4 | from block.datasets.vg import VG
  5 | from block.datasets.vqa_utils import ListVQADatasets
  6 | from .vqa2 import VQA2
  7 | from .vqacp2 import VQACP2
  8 | from .vqacp import VQACP
  9 | 
 10 | def factory(engine=None):
 11 |     opt = Options()['dataset']
 12 | 
 13 |     dataset = {}
 14 |     if opt.get('train_split', None):
 15 |         dataset['train'] = factory_split(opt['train_split'])
 16 |     if opt.get('eval_split', None):
 17 |         dataset['eval'] = factory_split(opt['eval_split'])
 18 | 
 19 |     return dataset
 20 | 
 21 | def factory_split(split):
 22 |     opt = Options()['dataset']
 23 |     shuffle = ('train' in split)
 24 | 
 25 |     if opt['name'] == 'vqacp2':
 26 |         assert(split in ['train', 'val', 'test'])
 27 |         samplingans = (opt['samplingans'] and split == 'train')
 28 | 
 29 |         dataset = VQACP2(
 30 |             dir_data=opt['dir'],
 31 |             split=split,
 32 |             batch_size=opt['batch_size'],
 33 |             nb_threads=opt['nb_threads'],
 34 |             pin_memory=Options()['misc']['cuda'],
 35 |             shuffle=shuffle,
 36 |             nans=opt['nans'],
 37 |             minwcount=opt['minwcount'],
 38 |             nlp=opt['nlp'],
 39 |             proc_split=opt['proc_split'],
 40 |             samplingans=samplingans,
 41 |             dir_rcnn=opt['dir_rcnn'],
 42 |             dir_cnn=opt.get('dir_cnn', None),
 43 |             dir_vgg16=opt.get('dir_vgg16', None),
 44 |             )
 45 |     elif opt['name'] == 'vqacp':
 46 |         assert(split in ['train', 'val', 'test'])
 47 |         samplingans = (opt['samplingans'] and split == 'train')
 48 | 
 49 |         dataset = VQACP(
 50 |             dir_data=opt['dir'],
 51 |             split=split,
 52 |             batch_size=opt['batch_size'],
 53 |             nb_threads=opt['nb_threads'],
 54 |             pin_memory=Options()['misc']['cuda'],
 55 |             shuffle=shuffle,
 56 |             nans=opt['nans'],
 57 |             minwcount=opt['minwcount'],
 58 |             nlp=opt['nlp'],
 59 |             proc_split=opt['proc_split'],
 60 |             samplingans=samplingans,
 61 |             dir_rcnn=opt['dir_rcnn'],
 62 |             dir_cnn=opt.get('dir_cnn', None),
 63 |             dir_vgg16=opt.get('dir_vgg16', None),
 64 |             )
 65 | 
 66 |     elif opt['name'] == 'vqacpv2-with-testdev':
 67 |         assert(split in ['train', 'val', 'test'])
 68 |         samplingans = (opt['samplingans'] and split == 'train')
 69 |         dataset = VQACP2(
 70 |             dir_data=opt['dir'],
 71 |             split=split,
 72 |             batch_size=opt['batch_size'],
 73 |             nb_threads=opt['nb_threads'],
 74 |             pin_memory=Options()['misc']['cuda'],
 75 |             shuffle=shuffle,
 76 |             nans=opt['nans'],
 77 |             minwcount=opt['minwcount'],
 78 |             nlp=opt['nlp'],
 79 |             proc_split=opt['proc_split'],
 80 |             samplingans=samplingans,
 81 |             dir_rcnn=opt['dir_rcnn'],
 82 |             dir_cnn=opt.get('dir_cnn', None),
 83 |             dir_vgg16=opt.get('dir_vgg16', None),
 84 |             has_testdevset=True,
 85 |             )
 86 | 
 87 |     elif opt['name'] == 'vqa2':
 88 |         assert(split in ['train', 'val', 'test'])
 89 |         samplingans = (opt['samplingans'] and split == 'train')
 90 | 
 91 |         if opt['vg']:
 92 |             assert(opt['proc_split'] == 'trainval')
 93 | 
 94 |             # trainvalset 
 95 |             vqa2 = VQA2(
 96 |                 dir_data=opt['dir'],
 97 |                 split='train',
 98 |                 nans=opt['nans'],
 99 |                 minwcount=opt['minwcount'],
100 |                 nlp=opt['nlp'],
101 |                 proc_split=opt['proc_split'],
102 |                 samplingans=samplingans,
103 |                 dir_rcnn=opt['dir_rcnn'])
104 | 
105 |             vg = VG(
106 |                 dir_data=opt['dir_vg'],
107 |                 split='train',
108 |                 nans=10000,
109 |                 minwcount=0,
110 |                 nlp=opt['nlp'],
111 |                 dir_rcnn=opt['dir_rcnn_vg'])
112 | 
113 |             vqa2vg = ListVQADatasets(
114 |                 [vqa2,vg],
115 |                 split='train',
116 |                 batch_size=opt['batch_size'],
117 |                 nb_threads=opt['nb_threads'],
118 |                 pin_memory=Options()['misc.cuda'],
119 |                 shuffle=shuffle)
120 | 
121 |             if split == 'train':
122 |                 dataset = vqa2vg
123 |             else:
124 |                 dataset = VQA2(
125 |                     dir_data=opt['dir'],
126 |                     split=split,
127 |                     batch_size=opt['batch_size'],
128 |                     nb_threads=opt['nb_threads'],
129 |                     pin_memory=Options()['misc.cuda'],
130 |                     shuffle=False,
131 |                     nans=opt['nans'],
132 |                     minwcount=opt['minwcount'],
133 |                     nlp=opt['nlp'],
134 |                     proc_split=opt['proc_split'],
135 |                     samplingans=samplingans,
136 |                     dir_rcnn=opt['dir_rcnn'])
137 |                 dataset.sync_from(vqa2vg)
138 | 
139 |         else:
140 |             dataset = VQA2(
141 |                 dir_data=opt['dir'],
142 |                 split=split,
143 |                 batch_size=opt['batch_size'],
144 |                 nb_threads=opt['nb_threads'],
145 |                 pin_memory=Options()['misc.cuda'],
146 |                 shuffle=shuffle,
147 |                 nans=opt['nans'],
148 |                 minwcount=opt['minwcount'],
149 |                 nlp=opt['nlp'],
150 |                 proc_split=opt['proc_split'],
151 |                 samplingans=samplingans,
152 |                 dir_rcnn=opt['dir_rcnn'],
153 |                 dir_cnn=opt.get('dir_cnn', None),
154 |                 )
155 | 
156 |     return dataset
157 | 


--------------------------------------------------------------------------------
/cfvqa/datasets/scripts/download_vqa2.sh:
--------------------------------------------------------------------------------
 1 | mkdir -p data/vqa
 2 | cd data/vqa
 3 | wget http://data.lip6.fr/cadene/block/vqa2.tar.gz
 4 | wget http://data.lip6.fr/cadene/block/coco.tar.gz
 5 | tar -xzvf vqa2.tar.gz
 6 | tar -xzvf coco.tar.gz
 7 | 
 8 | mkdir -p data/vqa/coco/extract_rcnn
 9 | cd data/vqa/coco/extract_rcnn
10 | wget http://data.lip6.fr/cadene/block/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36.tar
11 | tar -xvf 2018-04-27_bottom-up-attention_fixed_36.tar
12 | 


--------------------------------------------------------------------------------
/cfvqa/datasets/scripts/download_vqacp2.sh:
--------------------------------------------------------------------------------
1 | mkdir -p data/vqa
2 | cd data/vqa
3 | wget http://data.lip6.fr/cadene/murel/vqacp2.tar.gz
4 | tar -xzvf vqacp2.tar.gz
5 | 


--------------------------------------------------------------------------------
/cfvqa/datasets/vqa2.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | import copy
  4 | import json
  5 | import torch
  6 | import numpy as np
  7 | from os import path as osp
  8 | from bootstrap.lib.logger import Logger
  9 | from bootstrap.lib.options import Options
 10 | from block.datasets.vqa_utils import AbstractVQA
 11 | from copy import deepcopy
 12 | import random
 13 | import tqdm
 14 | import h5py
 15 | 
 16 | class VQA2(AbstractVQA):
 17 | 
 18 |     def __init__(self,
 19 |             dir_data='data/vqa2',
 20 |             split='train', 
 21 |             batch_size=10,
 22 |             nb_threads=4,
 23 |             pin_memory=False,
 24 |             shuffle=False,
 25 |             nans=1000,
 26 |             minwcount=10,
 27 |             nlp='mcb',
 28 |             proc_split='train',
 29 |             samplingans=False,
 30 |             dir_rcnn='data/coco/extract_rcnn',
 31 |             adversarial=False,
 32 |             dir_cnn=None
 33 |             ):
 34 | 
 35 |         super(VQA2, self).__init__(
 36 |             dir_data=dir_data,
 37 |             split=split,
 38 |             batch_size=batch_size,
 39 |             nb_threads=nb_threads,
 40 |             pin_memory=pin_memory,
 41 |             shuffle=shuffle,
 42 |             nans=nans,
 43 |             minwcount=minwcount,
 44 |             nlp=nlp,
 45 |             proc_split=proc_split,
 46 |             samplingans=samplingans,
 47 |             has_valset=True,
 48 |             has_testset=True,
 49 |             has_answers_occurence=True,
 50 |             do_tokenize_answers=False)            
 51 | 
 52 |         self.dir_rcnn = dir_rcnn
 53 |         self.dir_cnn = dir_cnn
 54 |         self.load_image_features()
 55 |         # to activate manually in visualization context (notebo# to activate manually in visualization context (notebook)
 56 |         self.load_original_annotation = False
 57 | 
 58 |     def add_rcnn_to_item(self, item):
 59 |         path_rcnn = os.path.join(self.dir_rcnn, '{}.pth'.format(item['image_name']))
 60 |         item_rcnn = torch.load(path_rcnn)
 61 |         item['visual'] = item_rcnn['pooled_feat']
 62 |         item['coord'] = item_rcnn['rois']
 63 |         item['norm_coord'] = item_rcnn.get('norm_rois', None)
 64 |         item['nb_regions'] = item['visual'].size(0)
 65 |         return item
 66 | 
 67 |     def add_cnn_to_item(self, item):
 68 |         image_name = item['image_name']
 69 |         if image_name in self.image_names_to_index_train:
 70 |             index = self.image_names_to_index_train[image_name]
 71 |             image = torch.tensor(self.image_features_train['att'][index])
 72 |         elif image_name in self.image_names_to_index_val:
 73 |             index = self.image_names_to_index_val[image_name]
 74 |             image = torch.tensor(self.image_features_val['att'][index])
 75 |         image = image.permute(1, 2, 0).view(196, 2048)
 76 |         item['visual'] = image
 77 |         return item
 78 | 
 79 |     def load_image_features(self):
 80 |         if self.dir_cnn:
 81 |             filename_train = os.path.join(self.dir_cnn, 'trainset.hdf5')
 82 |             filename_val = os.path.join(self.dir_cnn, 'valset.hdf5')
 83 |             Logger()(f"Opening file {filename_train}, {filename_val}")
 84 |             self.image_features_train = h5py.File(filename_train, 'r', swmr=True)
 85 |             self.image_features_val = h5py.File(filename_val, 'r', swmr=True)
 86 |             # load txt
 87 |             with open(os.path.join(self.dir_cnn, 'trainset.txt'.format(self.split)), 'r') as f:
 88 |                 self.image_names_to_index_train = {}
 89 |                 for i, line in enumerate(f):
 90 |                     self.image_names_to_index_train[line.strip()] = i
 91 |             with open(os.path.join(self.dir_cnn, 'valset.txt'.format(self.split)), 'r') as f:
 92 |                 self.image_names_to_index_val = {}
 93 |                 for i, line in enumerate(f):
 94 |                     self.image_names_to_index_val[line.strip()] = i
 95 | 
 96 |     def __getitem__(self, index):
 97 |         item = {}
 98 |         item['index'] = index
 99 | 
100 |         # Process Question (word token)
101 |         question = self.dataset['questions'][index]
102 |         if self.load_original_annotation:
103 |             item['original_question'] = question
104 | 
105 |         item['question_id'] = question['question_id']
106 | 
107 |         item['question'] = torch.tensor(question['question_wids'], dtype=torch.long)
108 |         item['lengths'] = torch.tensor([len(question['question_wids'])], dtype=torch.long)
109 |         item['image_name'] = question['image_name']
110 | 
111 |         # Process Object, Attribut and Relational features
112 |         # Process Object, Attribut and Relational features
113 |         if self.dir_rcnn:
114 |             item = self.add_rcnn_to_item(item)
115 |         elif self.dir_cnn:
116 |             item = self.add_cnn_to_item(item)
117 | 
118 |         # Process Answer if exists
119 |         if 'annotations' in self.dataset:
120 |             annotation = self.dataset['annotations'][index]
121 |             if self.load_original_annotation:
122 |                 item['original_annotation'] = annotation
123 |             if 'train' in self.split and self.samplingans:
124 |                 proba = annotation['answers_count']
125 |                 proba = proba / np.sum(proba)
126 |                 item['answer_id'] = int(np.random.choice(annotation['answers_id'], p=proba))
127 |             else:
128 |                 item['answer_id'] = annotation['answer_id']
129 |             item['class_id'] = torch.tensor([item['answer_id']], dtype=torch.long)
130 |             item['answer'] = annotation['answer']
131 |             item['question_type'] = annotation['question_type']
132 |         else:
133 |             if item['question_id'] in self.is_qid_testdev:
134 |                 item['is_testdev'] = True
135 |             else:
136 |                 item['is_testdev'] = False
137 | 
138 |         # if Options()['model.network.name'] == 'xmn_net':
139 |         #     num_feat = 36
140 |         #     relation_mask = np.zeros((num_feat, num_feat))
141 |         #     boxes = item['coord']
142 |         #     for i in range(num_feat):
143 |         #         for j in range(i+1, num_feat):
144 |         #             # if there is no overlap between two bounding box
145 |         #             if boxes[0,i]>boxes[2,j] or boxes[0,j]>boxes[2,i] or boxes[1,i]>boxes[3,j] or boxes[1,j]>boxes[3,i]:
146 |         #                 pass
147 |         #             else:
148 |         #                 relation_mask[i,j] = relation_mask[j,i] = 1
149 |         #     relation_mask = torch.from_numpy(relation_mask).byte()
150 |         #     item['relation_mask'] = relation_mask
151 | 
152 |         return item
153 | 
154 |     def download(self):
155 |         dir_zip = osp.join(self.dir_raw, 'zip')
156 |         os.system('mkdir -p '+dir_zip)
157 |         dir_ann = osp.join(self.dir_raw, 'annotations')
158 |         os.system('mkdir -p '+dir_ann)
159 |         os.system('wget http://visualqa.org/data/mscoco/vqa/v2_Questions_Train_mscoco.zip -P '+dir_zip)
160 |         os.system('wget http://visualqa.org/data/mscoco/vqa/v2_Questions_Val_mscoco.zip -P '+dir_zip)
161 |         os.system('wget http://visualqa.org/data/mscoco/vqa/v2_Questions_Test_mscoco.zip -P '+dir_zip)
162 |         os.system('wget http://visualqa.org/data/mscoco/vqa/v2_Annotations_Train_mscoco.zip -P '+dir_zip)
163 |         os.system('wget http://visualqa.org/data/mscoco/vqa/v2_Annotations_Val_mscoco.zip -P '+dir_zip)
164 |         os.system('unzip '+osp.join(dir_zip, 'v2_Questions_Train_mscoco.zip')+' -d '+dir_ann)
165 |         os.system('unzip '+osp.join(dir_zip, 'v2_Questions_Val_mscoco.zip')+' -d '+dir_ann)
166 |         os.system('unzip '+osp.join(dir_zip, 'v2_Questions_Test_mscoco.zip')+' -d '+dir_ann)
167 |         os.system('unzip '+osp.join(dir_zip, 'v2_Annotations_Train_mscoco.zip')+' -d '+dir_ann)
168 |         os.system('unzip '+osp.join(dir_zip, 'v2_Annotations_Val_mscoco.zip')+' -d '+dir_ann)
169 |         os.system('mv '+osp.join(dir_ann, 'v2_mscoco_train2014_annotations.json')+' '
170 |                        +osp.join(dir_ann, 'mscoco_train2014_annotations.json'))
171 |         os.system('mv '+osp.join(dir_ann, 'v2_mscoco_val2014_annotations.json')+' '
172 |                        +osp.join(dir_ann, 'mscoco_val2014_annotations.json'))
173 |         os.system('mv '+osp.join(dir_ann, 'v2_OpenEnded_mscoco_train2014_questions.json')+' '
174 |                        +osp.join(dir_ann, 'OpenEnded_mscoco_train2014_questions.json'))
175 |         os.system('mv '+osp.join(dir_ann, 'v2_OpenEnded_mscoco_val2014_questions.json')+' '
176 |                        +osp.join(dir_ann, 'OpenEnded_mscoco_val2014_questions.json'))
177 |         os.system('mv '+osp.join(dir_ann, 'v2_OpenEnded_mscoco_test2015_questions.json')+' '
178 |                        +osp.join(dir_ann, 'OpenEnded_mscoco_test2015_questions.json'))
179 |         os.system('mv '+osp.join(dir_ann, 'v2_OpenEnded_mscoco_test-dev2015_questions.json')+' '
180 |                        +osp.join(dir_ann, 'OpenEnded_mscoco_test-dev2015_questions.json'))
181 | 


--------------------------------------------------------------------------------
/cfvqa/datasets/vqacp.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | import copy
  4 | import json
  5 | import torch
  6 | import numpy as np
  7 | from tqdm import tqdm
  8 | from os import path as osp
  9 | from bootstrap.lib.logger import Logger
 10 | from block.datasets.vqa_utils import AbstractVQA
 11 | from copy import deepcopy
 12 | import random
 13 | import h5py
 14 | 
 15 | class VQACP(AbstractVQA):
 16 | 
 17 |     def __init__(self,
 18 |             dir_data='data/vqa/vqacp2',
 19 |             split='train',
 20 |             batch_size=80,
 21 |             nb_threads=4,
 22 |             pin_memory=False,
 23 |             shuffle=False,
 24 |             nans=1000,
 25 |             minwcount=10,
 26 |             nlp='mcb',
 27 |             proc_split='train',
 28 |             samplingans=False,
 29 |             dir_rcnn='data/coco/extract_rcnn',
 30 |             dir_cnn=None,
 31 |             dir_vgg16=None,
 32 |             has_testdevset=False,
 33 |             ):
 34 |         super(VQACP, self).__init__(
 35 |             dir_data=dir_data,
 36 |             split=split,
 37 |             batch_size=batch_size,
 38 |             nb_threads=nb_threads,
 39 |             pin_memory=pin_memory,
 40 |             shuffle=shuffle,
 41 |             nans=nans,
 42 |             minwcount=minwcount,
 43 |             nlp=nlp,
 44 |             proc_split=proc_split,
 45 |             samplingans=samplingans,
 46 |             has_valset=True,
 47 |             has_testset=False,
 48 |             has_testdevset=has_testdevset,
 49 |             has_testset_anno=False,
 50 |             has_answers_occurence=True,
 51 |             do_tokenize_answers=False)
 52 |         self.dir_rcnn = dir_rcnn
 53 |         self.dir_cnn = dir_cnn
 54 |         self.dir_vgg16 = dir_vgg16
 55 |         self.load_image_features()
 56 |         self.load_original_annotation = False
 57 | 
 58 |     def add_rcnn_to_item(self, item):
 59 |         path_rcnn = os.path.join(self.dir_rcnn, '{}.pth'.format(item['image_name']))
 60 |         item_rcnn = torch.load(path_rcnn)
 61 |         item['visual'] = item_rcnn['pooled_feat']
 62 |         item['coord'] = item_rcnn['rois']
 63 |         item['norm_coord'] = item_rcnn['norm_rois']
 64 |         item['nb_regions'] = item['visual'].size(0)
 65 |         return item
 66 | 
 67 |     def load_image_features(self):
 68 |         if self.dir_cnn:
 69 |             filename_train = os.path.join(self.dir_cnn, 'trainset.hdf5')
 70 |             filename_val = os.path.join(self.dir_cnn, 'valset.hdf5')
 71 |             Logger()(f"Opening file {filename_train}, {filename_val}")
 72 |             self.image_features_train = h5py.File(filename_train, 'r', swmr=True)
 73 |             self.image_features_val = h5py.File(filename_val, 'r', swmr=True)
 74 |             # load txt
 75 |             with open(os.path.join(self.dir_cnn, 'trainset.txt'.format(self.split)), 'r') as f:
 76 |                 self.image_names_to_index_train = {}
 77 |                 for i, line in enumerate(f):
 78 |                     self.image_names_to_index_train[line.strip()] = i
 79 |             with open(os.path.join(self.dir_cnn, 'valset.txt'.format(self.split)), 'r') as f:
 80 |                 self.image_names_to_index_val = {}
 81 |                 for i, line in enumerate(f):
 82 |                     self.image_names_to_index_val[line.strip()] = i
 83 |         elif self.dir_vgg16:
 84 |             # list filenames
 85 |             self.filenames_train = os.listdir(os.path.join(self.dir_vgg16, 'train'))
 86 |             self.filenames_val = os.listdir(os.path.join(self.dir_vgg16, 'val'))
 87 | 
 88 | 
 89 |     def add_vgg_to_item(self, item):
 90 |         image_name = item['image_name']
 91 |         filename = image_name + '.pth'
 92 |         if filename in self.filenames_train:
 93 |             path = os.path.join(self.dir_vgg16, 'train', filename)
 94 |         elif filename in self.filenames_val:
 95 |             path = os.path.join(self.dir_vgg16, 'val', filename)
 96 |         visual = torch.load(path)
 97 |         visual = visual.permute(1, 2, 0).view(14*14, 512)
 98 |         item['visual'] = visual
 99 |         return item
100 | 
101 |     def add_cnn_to_item(self, item):
102 |         image_name = item['image_name']
103 |         if image_name in self.image_names_to_index_train:
104 |             index = self.image_names_to_index_train[image_name]
105 |             image = torch.tensor(self.image_features_train['att'][index])
106 |         elif image_name in self.image_names_to_index_val:
107 |             index = self.image_names_to_index_val[image_name]
108 |             image = torch.tensor(self.image_features_val['att'][index])
109 |         image = image.permute(1, 2, 0).view(196, 2048)
110 |         item['visual'] = image
111 |         return item
112 | 
113 |     def __getitem__(self, index):
114 |         item = {}
115 |         item['index'] = index
116 | 
117 |         # Process Question (word token)
118 |         question = self.dataset['questions'][index]
119 |         if self.load_original_annotation:
120 |             item['original_question'] = question
121 |         item['question_id'] = question['question_id']
122 |         item['question'] = torch.LongTensor(question['question_wids'])
123 |         item['lengths'] = torch.LongTensor([len(question['question_wids'])])
124 |         item['image_name'] = question['image_name']
125 | 
126 |         # Process Object, Attribut and Relational features
127 |         if self.dir_rcnn:
128 |             item = self.add_rcnn_to_item(item)
129 |         elif self.dir_cnn:
130 |             item = self.add_cnn_to_item(item)
131 |         elif self.dir_vgg16:
132 |             item = self.add_vgg_to_item(item)
133 | 
134 |         # Process Answer if exists
135 |         if 'annotations' in self.dataset:
136 |             annotation = self.dataset['annotations'][index]
137 |             if self.load_original_annotation:
138 |                 item['original_annotation'] = annotation
139 |             if 'train' in self.split and self.samplingans:
140 |                 proba = annotation['answers_count']
141 |                 proba = proba / np.sum(proba)
142 |                 item['answer_id'] = int(np.random.choice(annotation['answers_id'], p=proba))
143 |             else:
144 |                 item['answer_id'] = annotation['answer_id']
145 |             item['class_id'] = torch.LongTensor([item['answer_id']])
146 |             item['answer'] = annotation['answer']
147 |             item['question_type'] = annotation['question_type']
148 | 
149 |         return item
150 | 
151 |     def download(self):
152 |         dir_ann = osp.join(self.dir_raw, 'annotations')
153 |         os.system('mkdir -p '+dir_ann)
154 |         os.system('wget https://computing.ece.vt.edu/~aish/vqacp/vqacp_v1_train_questions.json -P' + dir_ann)
155 |         os.system('wget https://computing.ece.vt.edu/~aish/vqacp/vqacp_v1_test_questions.json -P' + dir_ann)
156 |         os.system('wget https://computing.ece.vt.edu/~aish/vqacp/vqacp_v1_train_annotations.json -P' + dir_ann)
157 |         os.system('wget https://computing.ece.vt.edu/~aish/vqacp/vqacp_v1_test_annotations.json -P' + dir_ann)
158 |         train_q = {"questions":json.load(open(osp.join(dir_ann, "vqacp_v1_train_questions.json")))}
159 |         val_q = {"questions":json.load(open(osp.join(dir_ann, "vqacp_v1_test_questions.json")))}
160 |         train_ann = {"annotations":json.load(open(osp.join(dir_ann, "vqacp_v1_train_annotations.json")))}
161 |         val_ann = {"annotations":json.load(open(osp.join(dir_ann, "vqacp_v1_test_annotations.json")))}
162 |         train_q['info'] = {}
163 |         train_q['data_type'] = 'mscoco'
164 |         train_q['data_subtype'] = "train2014cp"
165 |         train_q['task_type'] = "Open-Ended"
166 |         train_q['license'] = {}
167 |         val_q['info'] = {}
168 |         val_q['data_type'] = 'mscoco'
169 |         val_q['data_subtype'] = "val2014cp"
170 |         val_q['task_type'] = "Open-Ended"
171 |         val_q['license'] = {}
172 |         for k in ["info", 'data_type','data_subtype', 'license']:
173 |             train_ann[k] = train_q[k]
174 |             val_ann[k] = val_q[k]
175 |         with open(osp.join(dir_ann, "OpenEnded_mscoco_train2014_questions.json"), 'w') as F:
176 |             F.write(json.dumps(train_q))
177 |         with open(osp.join(dir_ann, "OpenEnded_mscoco_val2014_questions.json"), 'w') as F:
178 |             F.write(json.dumps(val_q))
179 |         with open(osp.join(dir_ann, "mscoco_train2014_annotations.json"), 'w') as F:
180 |             F.write(json.dumps(train_ann))
181 |         with open(osp.join(dir_ann, "mscoco_val2014_annotations.json"), 'w') as F:
182 |             F.write(json.dumps(val_ann))
183 | 
184 |     def add_image_names(self, dataset):
185 |         for q in dataset['questions']:
186 |             q['image_name'] = 'COCO_%s_%012d.jpg'%(q['coco_split'],q['image_id'])
187 |         return dataset
188 | 
189 | 


--------------------------------------------------------------------------------
/cfvqa/datasets/vqacp2.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | import copy
  4 | import json
  5 | import torch
  6 | import numpy as np
  7 | from tqdm import tqdm
  8 | from os import path as osp
  9 | from bootstrap.lib.logger import Logger
 10 | from block.datasets.vqa_utils import AbstractVQA
 11 | from copy import deepcopy
 12 | import random
 13 | import h5py
 14 | 
 15 | class VQACP2(AbstractVQA):
 16 | 
 17 |     def __init__(self,
 18 |             dir_data='data/vqa/vqacp2',
 19 |             split='train',
 20 |             batch_size=80,
 21 |             nb_threads=4,
 22 |             pin_memory=False,
 23 |             shuffle=False,
 24 |             nans=1000,
 25 |             minwcount=10,
 26 |             nlp='mcb',
 27 |             proc_split='train',
 28 |             samplingans=False,
 29 |             dir_rcnn='data/coco/extract_rcnn',
 30 |             dir_cnn=None,
 31 |             dir_vgg16=None,
 32 |             has_testdevset=False,
 33 |             ):
 34 |         super(VQACP2, self).__init__(
 35 |             dir_data=dir_data,
 36 |             split=split,
 37 |             batch_size=batch_size,
 38 |             nb_threads=nb_threads,
 39 |             pin_memory=pin_memory,
 40 |             shuffle=shuffle,
 41 |             nans=nans,
 42 |             minwcount=minwcount,
 43 |             nlp=nlp,
 44 |             proc_split=proc_split,
 45 |             samplingans=samplingans,
 46 |             has_valset=True,
 47 |             has_testset=False,
 48 |             has_testdevset=has_testdevset,
 49 |             has_testset_anno=False,
 50 |             has_answers_occurence=True,
 51 |             do_tokenize_answers=False)
 52 |         self.dir_rcnn = dir_rcnn
 53 |         self.dir_cnn = dir_cnn
 54 |         self.dir_vgg16 = dir_vgg16
 55 |         self.load_image_features()
 56 |         self.load_original_annotation = False
 57 | 
 58 |     def add_rcnn_to_item(self, item):
 59 |         path_rcnn = os.path.join(self.dir_rcnn, '{}.pth'.format(item['image_name']))
 60 |         item_rcnn = torch.load(path_rcnn)
 61 |         item['visual'] = item_rcnn['pooled_feat']
 62 |         item['coord'] = item_rcnn['rois']
 63 |         item['norm_coord'] = item_rcnn['norm_rois']
 64 |         item['nb_regions'] = item['visual'].size(0)
 65 |         return item
 66 | 
 67 |     def load_image_features(self):
 68 |         if self.dir_cnn:
 69 |             filename_train = os.path.join(self.dir_cnn, 'trainset.hdf5')
 70 |             filename_val = os.path.join(self.dir_cnn, 'valset.hdf5')
 71 |             Logger()(f"Opening file {filename_train}, {filename_val}")
 72 |             self.image_features_train = h5py.File(filename_train, 'r', swmr=True)
 73 |             self.image_features_val = h5py.File(filename_val, 'r', swmr=True)
 74 |             # load txt
 75 |             with open(os.path.join(self.dir_cnn, 'trainset.txt'.format(self.split)), 'r') as f:
 76 |                 self.image_names_to_index_train = {}
 77 |                 for i, line in enumerate(f):
 78 |                     self.image_names_to_index_train[line.strip()] = i
 79 |             with open(os.path.join(self.dir_cnn, 'valset.txt'.format(self.split)), 'r') as f:
 80 |                 self.image_names_to_index_val = {}
 81 |                 for i, line in enumerate(f):
 82 |                     self.image_names_to_index_val[line.strip()] = i
 83 |         elif self.dir_vgg16:
 84 |             # list filenames
 85 |             self.filenames_train = os.listdir(os.path.join(self.dir_vgg16, 'train'))
 86 |             self.filenames_val = os.listdir(os.path.join(self.dir_vgg16, 'val'))
 87 | 
 88 | 
 89 |     def add_vgg_to_item(self, item):
 90 |         image_name = item['image_name']
 91 |         filename = image_name + '.pth'
 92 |         if filename in self.filenames_train:
 93 |             path = os.path.join(self.dir_vgg16, 'train', filename)
 94 |         elif filename in self.filenames_val:
 95 |             path = os.path.join(self.dir_vgg16, 'val', filename)
 96 |         visual = torch.load(path)
 97 |         visual = visual.permute(1, 2, 0).view(14*14, 512)
 98 |         item['visual'] = visual
 99 |         return item
100 | 
101 |     def add_cnn_to_item(self, item):
102 |         image_name = item['image_name']
103 |         if image_name in self.image_names_to_index_train:
104 |             index = self.image_names_to_index_train[image_name]
105 |             image = torch.tensor(self.image_features_train['att'][index])
106 |         elif image_name in self.image_names_to_index_val:
107 |             index = self.image_names_to_index_val[image_name]
108 |             image = torch.tensor(self.image_features_val['att'][index])
109 |         image = image.permute(1, 2, 0).view(196, 2048)
110 |         item['visual'] = image
111 |         return item
112 | 
113 |     def __getitem__(self, index):
114 |         item = {}
115 |         item['index'] = index
116 | 
117 |         # Process Question (word token)
118 |         question = self.dataset['questions'][index]
119 |         if self.load_original_annotation:
120 |             item['original_question'] = question
121 |         item['question_id'] = question['question_id']
122 |         item['question'] = torch.LongTensor(question['question_wids'])
123 |         item['lengths'] = torch.LongTensor([len(question['question_wids'])])
124 |         item['image_name'] = question['image_name']
125 | 
126 |         # Process Object, Attribut and Relational features
127 |         if self.dir_rcnn:
128 |             item = self.add_rcnn_to_item(item)
129 |         elif self.dir_cnn:
130 |             item = self.add_cnn_to_item(item)
131 |         elif self.dir_vgg16:
132 |             item = self.add_vgg_to_item(item)
133 | 
134 |         # Process Answer if exists
135 |         if 'annotations' in self.dataset:
136 |             annotation = self.dataset['annotations'][index]
137 |             if self.load_original_annotation:
138 |                 item['original_annotation'] = annotation
139 |             if 'train' in self.split and self.samplingans:
140 |                 proba = annotation['answers_count']
141 |                 proba = proba / np.sum(proba)
142 |                 item['answer_id'] = int(np.random.choice(annotation['answers_id'], p=proba))
143 |             else:
144 |                 item['answer_id'] = annotation['answer_id']
145 |             item['class_id'] = torch.LongTensor([item['answer_id']])
146 |             item['answer'] = annotation['answer']
147 |             item['question_type'] = annotation['question_type']
148 | 
149 |         return item
150 | 
151 |     def download(self):
152 |         dir_ann = osp.join(self.dir_raw, 'annotations')
153 |         os.system('mkdir -p '+dir_ann)
154 |         os.system('wget https://computing.ece.vt.edu/~aish/vqacp/vqacp_v2_train_questions.json -P' + dir_ann)
155 |         os.system('wget https://computing.ece.vt.edu/~aish/vqacp/vqacp_v2_test_questions.json -P' + dir_ann)
156 |         os.system('wget https://computing.ece.vt.edu/~aish/vqacp/vqacp_v2_train_annotations.json -P' + dir_ann)
157 |         os.system('wget https://computing.ece.vt.edu/~aish/vqacp/vqacp_v2_test_annotations.json -P' + dir_ann)
158 |         train_q = {"questions":json.load(open(osp.join(dir_ann, "vqacp_v2_train_questions.json")))}
159 |         val_q = {"questions":json.load(open(osp.join(dir_ann, "vqacp_v2_test_questions.json")))}
160 |         train_ann = {"annotations":json.load(open(osp.join(dir_ann, "vqacp_v2_train_annotations.json")))}
161 |         val_ann = {"annotations":json.load(open(osp.join(dir_ann, "vqacp_v2_test_annotations.json")))}
162 |         train_q['info'] = {}
163 |         train_q['data_type'] = 'mscoco'
164 |         train_q['data_subtype'] = "train2014cp"
165 |         train_q['task_type'] = "Open-Ended"
166 |         train_q['license'] = {}
167 |         val_q['info'] = {}
168 |         val_q['data_type'] = 'mscoco'
169 |         val_q['data_subtype'] = "val2014cp"
170 |         val_q['task_type'] = "Open-Ended"
171 |         val_q['license'] = {}
172 |         for k in ["info", 'data_type','data_subtype', 'license']:
173 |             train_ann[k] = train_q[k]
174 |             val_ann[k] = val_q[k]
175 |         with open(osp.join(dir_ann, "OpenEnded_mscoco_train2014_questions.json"), 'w') as F:
176 |             F.write(json.dumps(train_q))
177 |         with open(osp.join(dir_ann, "OpenEnded_mscoco_val2014_questions.json"), 'w') as F:
178 |             F.write(json.dumps(val_q))
179 |         with open(osp.join(dir_ann, "mscoco_train2014_annotations.json"), 'w') as F:
180 |             F.write(json.dumps(train_ann))
181 |         with open(osp.join(dir_ann, "mscoco_val2014_annotations.json"), 'w') as F:
182 |             F.write(json.dumps(val_ann))
183 | 
184 |     def add_image_names(self, dataset):
185 |         for q in dataset['questions']:
186 |             q['image_name'] = 'COCO_%s_%012d.jpg'%(q['coco_split'],q['image_id'])
187 |         return dataset
188 | 
189 | 


--------------------------------------------------------------------------------
/cfvqa/models/criterions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuleiniu/cfvqa/736795bef50db9c76818f9a08202c7f325489afd/cfvqa/models/criterions/__init__.py


--------------------------------------------------------------------------------
/cfvqa/models/criterions/cfvqa_criterion.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import torch.nn.functional as F
 4 | from bootstrap.lib.logger import Logger
 5 | from bootstrap.lib.options import Options
 6 | 
 7 | class CFVQACriterion(nn.Module):
 8 | 
 9 |     def __init__(self, question_loss_weight=1.0, vision_loss_weight=1.0, is_va=True):
10 |         super().__init__()
11 |         self.is_va = is_va
12 | 
13 |         Logger()(f'CFVQACriterion, with question_loss_weight = ({question_loss_weight})')
14 |         if self.is_va:
15 |             Logger()(f'CFVQACriterion, with vision_loss_weight = ({vision_loss_weight})')
16 | 
17 |         self.fusion_loss = nn.CrossEntropyLoss()
18 |         self.question_loss = nn.CrossEntropyLoss()
19 |         self.question_loss_weight = question_loss_weight
20 |         if self.is_va:
21 |             self.vision_loss = nn.CrossEntropyLoss()
22 |             self.vision_loss_weight = vision_loss_weight
23 |         
24 |     def forward(self, net_out, batch):
25 |         out = {}
26 |         class_id = batch['class_id'].squeeze(1)
27 |         
28 |         logits_rubi = net_out['logits_all']
29 |         fusion_loss = self.fusion_loss(logits_rubi, class_id)
30 |         
31 |         logits_q = net_out['logits_q']
32 |         question_loss = self.question_loss(logits_q, class_id)
33 | 
34 |         if self.is_va:
35 |             logits_v = net_out['logits_v']
36 |             vision_loss = self.vision_loss(logits_v, class_id)
37 | 
38 |         nde = net_out['z_nde']
39 |         p_te = torch.nn.functional.softmax(logits_rubi, -1).clone().detach()
40 |         p_nde = torch.nn.functional.softmax(nde, -1)
41 |         kl_loss = - p_te*p_nde.log()    
42 |         kl_loss = kl_loss.sum(1).mean() 
43 | 
44 |         loss = fusion_loss \
45 |                 + self.question_loss_weight * question_loss \
46 |                 + kl_loss
47 |         if self.is_va:
48 |             loss += self.vision_loss_weight * vision_loss
49 | 
50 |         out['loss'] = loss
51 |         out['loss_mm_q'] = fusion_loss
52 |         out['loss_q'] = question_loss
53 |         if self.is_va:
54 |             out['loss_v'] = vision_loss
55 |         return out
56 | 


--------------------------------------------------------------------------------
/cfvqa/models/criterions/factory.py:
--------------------------------------------------------------------------------
 1 | from bootstrap.lib.options import Options
 2 | from block.models.criterions.vqa_cross_entropy import VQACrossEntropyLoss
 3 | from .rubi_criterion import RUBiCriterion
 4 | from .cfvqa_criterion import CFVQACriterion
 5 | 
 6 | def factory(engine, mode):
 7 |     name = Options()['model.criterion.name']
 8 |     split = engine.dataset[mode].split
 9 |     eval_only = 'train' not in engine.dataset
10 |     
11 |     opt = Options()['model.criterion']
12 |     if split == "test" and 'tdiuc' not in Options()['dataset.name']:
13 |         return None
14 |     if name == 'vqa_cross_entropy':
15 |         criterion = VQACrossEntropyLoss()
16 |     elif name == "rubi_criterion":
17 |         criterion = RUBiCriterion(
18 |             question_loss_weight=opt['question_loss_weight']
19 |         )
20 |     elif name == "cfvqa_criterion":
21 |         criterion = CFVQACriterion(
22 |             question_loss_weight=opt['question_loss_weight'],
23 |             vision_loss_weight=opt['vision_loss_weight'],
24 |             is_va=True
25 |         )
26 |     elif name == "cfvqasimple_criterion":
27 |         criterion = CFVQACriterion(
28 |             question_loss_weight=opt['question_loss_weight'],
29 |             is_va=False
30 |         )
31 |     else:
32 |         raise ValueError(name)
33 |     return criterion
34 | 


--------------------------------------------------------------------------------
/cfvqa/models/criterions/rubi_criterion.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import torch.nn.functional as F
 4 | from bootstrap.lib.logger import Logger
 5 | from bootstrap.lib.options import Options
 6 | 
 7 | class RUBiCriterion(nn.Module):
 8 | 
 9 |     def __init__(self, question_loss_weight=1.0):
10 |         super().__init__()
11 | 
12 |         Logger()(f'RUBiCriterion, with question_loss_weight = ({question_loss_weight})')
13 | 
14 |         self.question_loss_weight = question_loss_weight
15 |         self.fusion_loss = nn.CrossEntropyLoss()
16 |         self.question_loss = nn.CrossEntropyLoss()
17 |      
18 |     def forward(self, net_out, batch):
19 |         out = {}
20 |         # logits = net_out['logits']
21 |         logits_q = net_out['logits_q']
22 |         logits_rubi = net_out['logits_all']
23 |         class_id = batch['class_id'].squeeze(1)
24 |         fusion_loss = self.fusion_loss(logits_rubi, class_id)
25 |         question_loss = self.question_loss(logits_q, class_id)
26 |         loss = fusion_loss + self.question_loss_weight * question_loss
27 | 
28 |         out['loss'] = loss
29 |         out['loss_mm_q'] = fusion_loss
30 |         out['loss_q'] = question_loss
31 |         return out
32 | 


--------------------------------------------------------------------------------
/cfvqa/models/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuleiniu/cfvqa/736795bef50db9c76818f9a08202c7f325489afd/cfvqa/models/metrics/__init__.py


--------------------------------------------------------------------------------
/cfvqa/models/metrics/factory.py:
--------------------------------------------------------------------------------
 1 | from bootstrap.lib.options import Options
 2 | from block.models.metrics.vqa_accuracies import VQAAccuracies
 3 | from .vqa_rubi_metrics import VQARUBiMetrics
 4 | from .vqa_cfvqa_metrics import VQACFVQAMetrics
 5 | from .vqa_cfvqasimple_metrics import VQACFVQASimpleMetrics
 6 | 
 7 | def factory(engine, mode):
 8 |     name = Options()['model.metric.name']
 9 |     metric = None
10 | 
11 |     if name == 'vqa_accuracies':
12 |         open_ended = ('tdiuc' not in Options()['dataset.name'] and 'gqa' not in Options()['dataset.name'])
13 |         if mode == 'train':
14 |             split = engine.dataset['train'].split
15 |             if split == 'train':
16 |                 metric = VQAAccuracies(engine,
17 |                     mode='train',
18 |                     open_ended=open_ended,
19 |                     tdiuc=True,
20 |                     dir_exp=Options()['exp.dir'],
21 |                     dir_vqa=Options()['dataset.dir'])
22 |             elif split == 'trainval':
23 |                 metric = None
24 |             else:
25 |                 raise ValueError(split)
26 |         elif mode == 'eval':
27 |             metric = VQAAccuracies(engine,
28 |                 mode='eval',
29 |                 open_ended=open_ended,
30 |                 tdiuc=('tdiuc' in Options()['dataset.name'] or Options()['dataset.eval_split'] != 'test'),
31 |                 dir_exp=Options()['exp.dir'],
32 |                 dir_vqa=Options()['dataset.dir'])
33 |         else:
34 |             metric = None
35 | 
36 |     elif name == "vqa_rubi_metrics":
37 |         open_ended = ('tdiuc' not in Options()['dataset.name'] and 'gqa' not in Options()['dataset.name'])
38 |         metric = VQARUBiMetrics(engine,
39 |             mode=mode,
40 |             open_ended=open_ended,
41 |             tdiuc=True,
42 |             dir_exp=Options()['exp.dir'],
43 |             dir_vqa=Options()['dataset.dir']
44 |         )
45 | 
46 |     elif name == "vqa_cfvqa_metrics":
47 |         open_ended = ('tdiuc' not in Options()['dataset.name'] and 'gqa' not in Options()['dataset.name'])
48 |         metric = VQACFVQAMetrics(engine,
49 |             mode=mode,
50 |             open_ended=open_ended,
51 |             tdiuc=True,
52 |             dir_exp=Options()['exp.dir'],
53 |             dir_vqa=Options()['dataset.dir'],
54 |         )
55 | 
56 |     elif name == "vqa_cfvqasimple_metrics":
57 |         open_ended = ('tdiuc' not in Options()['dataset.name'] and 'gqa' not in Options()['dataset.name'])
58 |         metric = VQACFVQASimpleMetrics(engine,
59 |             mode=mode,
60 |             open_ended=open_ended,
61 |             tdiuc=True,
62 |             dir_exp=Options()['exp.dir'],
63 |             dir_vqa=Options()['dataset.dir'],
64 |         )
65 | 
66 |     else:
67 |         raise ValueError(name)
68 |     return metric
69 | 


--------------------------------------------------------------------------------
/cfvqa/models/metrics/vqa_cfvqa_metrics.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import os
  4 | import json
  5 | from scipy import stats
  6 | import numpy as np
  7 | from collections import defaultdict
  8 | 
  9 | from bootstrap.models.metrics.accuracy import accuracy
 10 | from block.models.metrics.vqa_accuracies import VQAAccuracies
 11 | from bootstrap.lib.logger import Logger
 12 | from bootstrap.lib.options import Options
 13 | from bootstrap.lib.logger import Logger
 14 | 
 15 | class VQAAccuracy(nn.Module):
 16 | 
 17 |     def __init__(self, topk=[1,5]):
 18 |         super().__init__()
 19 |         self.topk = topk
 20 |         self.metric_list = ['_all', '_vq', '_cfvqa', '_q', '_v']
 21 | 
 22 |     def forward(self, cri_out, net_out, batch):
 23 |         out = {}
 24 |         class_id = batch['class_id'].data.cpu()
 25 |         for key in self.metric_list:
 26 |             logits = net_out[f'logits{key}'].data.cpu()
 27 |             acc_out = accuracy(logits, class_id, topk=self.topk)
 28 |             for i, k in enumerate(self.topk):
 29 |                 out[f'accuracy{key}_top{k}'] = acc_out[i]
 30 |         return out
 31 | 
 32 | 
 33 | class VQACFVQAMetrics(VQAAccuracies):
 34 | 
 35 |     def __init__(self, *args, **kwargs):
 36 |         super().__init__(*args, **kwargs)
 37 |         self.metric_list = ['_all', '_vq', '_cfvqa', '_q', '_v']
 38 |         if Options()['dataset.eval_split'] == 'test': # 0430
 39 |             self.accuracy = None
 40 |         else:
 41 |             self.accuracy = VQAAccuracy()
 42 |         self.rm_dir_rslt = 1 if Options()['dataset.train_split'] is not None else 0
 43 | 
 44 |     def forward(self, cri_out, net_out, batch):
 45 |         out = {}
 46 |         if self.accuracy is not None:
 47 |             out = self.accuracy(cri_out, net_out, batch)
 48 | 
 49 |         # add answers and answer_ids keys to net_out
 50 |         net_out = self.engine.model.network.process_answers(net_out)
 51 | 
 52 |         batch_size = len(batch['index'])
 53 |         for i in range(batch_size):
 54 |             
 55 |             # Open Ended Accuracy (VQA-VQA2)
 56 |             if self.open_ended:
 57 |                 for key in self.metric_list:
 58 |                     pred_item = {
 59 |                         'question_id': batch['question_id'][i],
 60 |                         'answer': net_out[f'answers{key}'][i]
 61 |                     }
 62 |                     self.results[key].append(pred_item)
 63 | 
 64 |                 # if self.dataset.split == 'test': # 0430
 65 |                 #     pred_item = {
 66 |                 #         'question_id': batch['question_id'][i],
 67 |                 #         'answer': net_out[f'answers{key}'][i]
 68 |                 #         # 'answer': net_out[f'answers'][i]
 69 |                 #     }
 70 |                 #     # if 'is_testdev' in batch and batch['is_testdev'][i]: # 0430
 71 |                 #     #     self.results_testdev.append(pred_item)
 72 | 
 73 |                 #     if self.logits['tensor'] is None:
 74 |                 #         self.logits['tensor'] = torch.FloatTensor(len(self.dataset), logits.size(1))
 75 | 
 76 |                 #     self.logits['tensor'][self.idx] = logits[i]
 77 |                 #     self.logits['qid_to_idx'][batch['question_id'][i]] = self.idx
 78 |                     
 79 |                 #     self.idx += 1
 80 | 
 81 |                 # TDIUC metrics
 82 |                 if self.tdiuc:
 83 |                     gt_aid = batch['answer_id'][i]
 84 |                     gt_ans = batch['answer'][i]
 85 |                     gt_type = batch['question_type'][i]
 86 |                     self.gt_types.append(gt_type)
 87 |                     if gt_ans in self.ans_to_aid:
 88 |                         self.gt_aids.append(gt_aid)
 89 |                     else:
 90 |                         self.gt_aids.append(-1)
 91 |                         self.gt_aid_not_found += 1
 92 | 
 93 |                     for key in self.metric_list:
 94 |                         qid = batch['question_id'][i]
 95 |                         pred_aid = net_out[f'answer_ids{key}'][i]
 96 |                         self.pred_aids[key].append(pred_aid)
 97 | 
 98 |                         self.res_by_type[key][gt_type+'_pred'].append(pred_aid)
 99 | 
100 |                         if gt_ans in self.ans_to_aid:
101 |                             self.res_by_type[key][gt_type+'_gt'].append(gt_aid)
102 |                             if gt_aid == pred_aid:
103 |                                 self.res_by_type[key][gt_type+'_t'].append(pred_aid)
104 |                             else:
105 |                                 self.res_by_type[key][gt_type+'_f'].append(pred_aid)
106 |                         else:
107 |                             self.res_by_type[key][gt_type+'_gt'].append(-1)
108 |                             self.res_by_type[key][gt_type+'_f'].append(pred_aid)
109 |         return out
110 | 
111 |     def reset_oe(self):
112 |         self.results = dict()
113 |         self.dir_rslt = dict()
114 |         self.path_rslt = dict()
115 |         for key in self.metric_list:
116 |             self.results[key] = []
117 |             self.dir_rslt[key] = os.path.join(
118 |                 self.dir_exp,
119 |                 f'results{key}',
120 |                 self.dataset.split,
121 |                 'epoch,{}'.format(self.engine.epoch))
122 |             os.system('mkdir -p '+self.dir_rslt[key])
123 |             self.path_rslt[key] = os.path.join(
124 |                 self.dir_rslt[key],
125 |                 'OpenEnded_mscoco_{}_model_results.json'.format(
126 |                     self.dataset.get_subtype()))
127 | 
128 |             if self.dataset.split == 'test':
129 |                 pass
130 |                 # self.results_testdev = []
131 |                 # self.path_rslt_testdev = os.path.join(
132 |                 #     self.dir_rslt,
133 |                 #     'OpenEnded_mscoco_{}_model_results.json'.format(
134 |                 #         self.dataset.get_subtype(testdev=True)))
135 | 
136 |                 # self.path_logits = os.path.join(self.dir_rslt, 'logits.pth')
137 |                 # os.system('mkdir -p '+os.path.dirname(self.path_logits))
138 | 
139 |                 # self.logits = {}
140 |                 # self.logits['aid_to_ans'] = self.engine.model.network.aid_to_ans
141 |                 # self.logits['qid_to_idx'] = {}
142 |                 # self.logits['tensor'] = None
143 | 
144 |                 # self.idx = 0
145 | 
146 |                 # path_aid_to_ans = os.path.join(self.dir_rslt, 'aid_to_ans.json')
147 |                 # with open(path_aid_to_ans, 'w') as f:
148 |                 #     json.dump(self.engine.model.network.aid_to_ans, f)
149 |     
150 | 
151 |     def reset_tdiuc(self):
152 |         self.pred_aids = defaultdict(list)
153 |         self.gt_aids = []
154 |         self.gt_types = []
155 |         self.gt_aid_not_found = 0
156 |         self.res_by_type = {key: defaultdict(list) for key in self.metric_list}
157 |     
158 |     
159 |     def compute_oe_accuracy(self):
160 |         logs_name_prefix = Options()['misc'].get('logs_name', '') or ''
161 |         
162 |         for key in self.metric_list:
163 |             logs_name = (logs_name_prefix + key) or "logs"
164 |             with open(self.path_rslt[key], 'w') as f:
165 |                 json.dump(self.results[key], f)
166 |             
167 |             # if self.dataset.split == 'test':
168 |             #     with open(self.path_rslt_testdev, 'w') as f:
169 |             #         json.dump(self.results_testdev, f)
170 | 
171 |             if 'test' not in self.dataset.split:
172 |                 call_to_prog = 'python -m block.models.metrics.compute_oe_accuracy '\
173 |                     + '--dir_vqa {} --dir_exp {} --dir_rslt {} --epoch {} --split {} --logs_name {} --rm {} &'\
174 |                     .format(self.dir_vqa, self.dir_exp, self.dir_rslt[key], self.engine.epoch, self.dataset.split, logs_name, self.rm_dir_rslt)
175 |                 Logger()('`'+call_to_prog+'`')
176 |                 os.system(call_to_prog)
177 | 
178 | 
179 |     def compute_tdiuc_metrics(self):
180 |         Logger()('{} of validation answers were not found in ans_to_aid'.format(self.gt_aid_not_found))
181 |         
182 |         for key in self.metric_list:
183 |             Logger()(f'Computing TDIUC metrics for logits{key}')
184 |             accuracy = float(100*np.mean(np.array(self.pred_aids[key])==np.array(self.gt_aids)))
185 |             Logger()('Overall Traditional Accuracy is {:.2f}'.format(accuracy))
186 |             Logger().log_value('{}_epoch.tdiuc.accuracy{}'.format(self.mode, key), accuracy, should_print=False)
187 |             
188 |             types = list(set(self.gt_types))
189 |             sum_acc = []
190 |             eps = 1e-10
191 | 
192 |             Logger()('---------------------------------------')
193 |             Logger()('Not using per-answer normalization...')
194 |             for tp in types:
195 |                 acc = 100*(len(self.res_by_type[key][tp+'_t'])/len(self.res_by_type[key][tp+'_t']+self.res_by_type[key][tp+'_f']))
196 |                 sum_acc.append(acc+eps)
197 |                 Logger()(f"Accuracy {key} for class '{tp}' is {acc:.2f}")
198 |                 Logger().log_value('{}_epoch.tdiuc{}.perQuestionType.{}'.format(self.mode, key, tp), acc, should_print=False)
199 | 
200 |             acc_mpt_a = float(np.mean(np.array(sum_acc)))
201 |             Logger()('Arithmetic MPT Accuracy {} is {:.2f}'.format(key, acc_mpt_a))
202 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_a'.format(self.mode, key), acc_mpt_a, should_print=False)
203 | 
204 |             acc_mpt_h = float(stats.hmean(sum_acc))
205 |             Logger()('Harmonic MPT Accuracy {} is {:.2f}'.format(key, acc_mpt_h))
206 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_h'.format(self.mode, key), acc_mpt_h, should_print=False)
207 |             
208 |             Logger()('---------------------------------------')
209 |             Logger()('Using per-answer normalization...')
210 |             for tp in types:
211 |                 per_ans_stat = defaultdict(int)
212 |                 for g,p in zip(self.res_by_type[key][tp+'_gt'],self.res_by_type[key][tp+'_pred']):
213 |                     per_ans_stat[str(g)+'_gt']+=1
214 |                     if g==p:
215 |                         per_ans_stat[str(g)]+=1
216 |                 unq_acc = 0
217 |                 for unq_ans in set(self.res_by_type[key][tp+'_gt']):
218 |                     acc_curr_ans = per_ans_stat[str(unq_ans)]/per_ans_stat[str(unq_ans)+'_gt']
219 |                     unq_acc +=acc_curr_ans
220 |                 acc = 100*unq_acc/len(set(self.res_by_type[key][tp+'_gt']))
221 |                 sum_acc.append(acc+eps)
222 |                 Logger()("Accuracy {} for class '{}' is {:.2f}".format(key, tp, acc))
223 |                 Logger().log_value('{}_epoch.tdiuc{}.perQuestionType_norm.{}'.format(self.mode, key, tp), acc, should_print=False)
224 | 
225 |             acc_mpt_a = float(np.mean(np.array(sum_acc)))
226 |             Logger()('Arithmetic MPT Accuracy is {:.2f}'.format(acc_mpt_a))
227 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_a_norm'.format(self.mode, key), acc_mpt_a, should_print=False)
228 | 
229 |             acc_mpt_h = float(stats.hmean(sum_acc))
230 |             Logger()('Harmonic MPT Accuracy is {:.2f}'.format(acc_mpt_h))
231 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_h_norm'.format(self.mode, key), acc_mpt_h, should_print=False)
232 | 


--------------------------------------------------------------------------------
/cfvqa/models/metrics/vqa_cfvqasimple_metrics.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import os
  4 | import json
  5 | from scipy import stats
  6 | import numpy as np
  7 | from collections import defaultdict
  8 | 
  9 | from bootstrap.models.metrics.accuracy import accuracy
 10 | from block.models.metrics.vqa_accuracies import VQAAccuracies
 11 | from bootstrap.lib.logger import Logger
 12 | from bootstrap.lib.options import Options
 13 | from bootstrap.lib.logger import Logger
 14 | 
 15 | class VQAAccuracy(nn.Module):
 16 | 
 17 |     def __init__(self, topk=[1,5]):
 18 |         super().__init__()
 19 |         self.topk = topk
 20 |         self.metric_list = ['_all', '_vq', '_cfvqa', '_q']
 21 | 
 22 |     def forward(self, cri_out, net_out, batch):
 23 |         out = {}
 24 |         class_id = batch['class_id'].data.cpu()
 25 |         for key in self.metric_list:
 26 |             logits = net_out[f'logits{key}'].data.cpu()
 27 |             acc_out = accuracy(logits, class_id, topk=self.topk)
 28 |             for i, k in enumerate(self.topk):
 29 |                 out[f'accuracy{key}_top{k}'] = acc_out[i]
 30 |         return out
 31 | 
 32 | 
 33 | class VQACFVQASimpleMetrics(VQAAccuracies):
 34 | 
 35 |     def __init__(self, *args, **kwargs):
 36 |         super().__init__(*args, **kwargs)
 37 |         self.metric_list = ['_all', '_vq', '_cfvqa', '_q']
 38 |         if Options()['dataset.eval_split'] == 'test': # 0430
 39 |             self.accuracy = None
 40 |         else:
 41 |             self.accuracy = VQAAccuracy()
 42 |         self.rm_dir_rslt = 1 if Options()['dataset.train_split'] is not None else 0
 43 | 
 44 |     def forward(self, cri_out, net_out, batch):
 45 |         out = {}
 46 |         if self.accuracy is not None:
 47 |             out = self.accuracy(cri_out, net_out, batch)
 48 | 
 49 |         # add answers and answer_ids keys to net_out
 50 |         net_out = self.engine.model.network.process_answers(net_out)
 51 | 
 52 |         batch_size = len(batch['index'])
 53 |         for i in range(batch_size):
 54 |             
 55 |             # Open Ended Accuracy (VQA-VQA2)
 56 |             if self.open_ended:
 57 |                 for key in self.metric_list:
 58 |                     pred_item = {
 59 |                         'question_id': batch['question_id'][i],
 60 |                         'answer': net_out[f'answers{key}'][i]
 61 |                     }
 62 |                     self.results[key].append(pred_item)
 63 | 
 64 |                 # if self.dataset.split == 'test': # 0430
 65 |                 #     pred_item = {
 66 |                 #         'question_id': batch['question_id'][i],
 67 |                 #         'answer': net_out[f'answers{key}'][i]
 68 |                 #         # 'answer': net_out[f'answers'][i]
 69 |                 #     }
 70 |                 #     # if 'is_testdev' in batch and batch['is_testdev'][i]: # 0430
 71 |                 #     #     self.results_testdev.append(pred_item)
 72 | 
 73 |                 #     if self.logits['tensor'] is None:
 74 |                 #         self.logits['tensor'] = torch.FloatTensor(len(self.dataset), logits.size(1))
 75 | 
 76 |                 #     self.logits['tensor'][self.idx] = logits[i]
 77 |                 #     self.logits['qid_to_idx'][batch['question_id'][i]] = self.idx
 78 |                     
 79 |                 #     self.idx += 1
 80 | 
 81 |                 # TDIUC metrics
 82 |                 if self.tdiuc:
 83 |                     gt_aid = batch['answer_id'][i]
 84 |                     gt_ans = batch['answer'][i]
 85 |                     gt_type = batch['question_type'][i]
 86 |                     self.gt_types.append(gt_type)
 87 |                     if gt_ans in self.ans_to_aid:
 88 |                         self.gt_aids.append(gt_aid)
 89 |                     else:
 90 |                         self.gt_aids.append(-1)
 91 |                         self.gt_aid_not_found += 1
 92 | 
 93 |                     for key in self.metric_list:
 94 |                         qid = batch['question_id'][i]
 95 |                         pred_aid = net_out[f'answer_ids{key}'][i]
 96 |                         self.pred_aids[key].append(pred_aid)
 97 | 
 98 |                         self.res_by_type[key][gt_type+'_pred'].append(pred_aid)
 99 | 
100 |                         if gt_ans in self.ans_to_aid:
101 |                             self.res_by_type[key][gt_type+'_gt'].append(gt_aid)
102 |                             if gt_aid == pred_aid:
103 |                                 self.res_by_type[key][gt_type+'_t'].append(pred_aid)
104 |                             else:
105 |                                 self.res_by_type[key][gt_type+'_f'].append(pred_aid)
106 |                         else:
107 |                             self.res_by_type[key][gt_type+'_gt'].append(-1)
108 |                             self.res_by_type[key][gt_type+'_f'].append(pred_aid)
109 |         return out
110 | 
111 |     def reset_oe(self):
112 |         self.results = dict()
113 |         self.dir_rslt = dict()
114 |         self.path_rslt = dict()
115 |         for key in self.metric_list:
116 |             self.results[key] = []
117 |             self.dir_rslt[key] = os.path.join(
118 |                 self.dir_exp,
119 |                 f'results{key}',
120 |                 self.dataset.split,
121 |                 'epoch,{}'.format(self.engine.epoch))
122 |             os.system('mkdir -p '+self.dir_rslt[key])
123 |             self.path_rslt[key] = os.path.join(
124 |                 self.dir_rslt[key],
125 |                 'OpenEnded_mscoco_{}_model_results.json'.format(
126 |                     self.dataset.get_subtype()))
127 | 
128 |             if self.dataset.split == 'test':
129 |                 pass
130 |                 # self.results_testdev = []
131 |                 # self.path_rslt_testdev = os.path.join(
132 |                 #     self.dir_rslt,
133 |                 #     'OpenEnded_mscoco_{}_model_results.json'.format(
134 |                 #         self.dataset.get_subtype(testdev=True)))
135 | 
136 |                 # self.path_logits = os.path.join(self.dir_rslt, 'logits.pth')
137 |                 # os.system('mkdir -p '+os.path.dirname(self.path_logits))
138 | 
139 |                 # self.logits = {}
140 |                 # self.logits['aid_to_ans'] = self.engine.model.network.aid_to_ans
141 |                 # self.logits['qid_to_idx'] = {}
142 |                 # self.logits['tensor'] = None
143 | 
144 |                 # self.idx = 0
145 | 
146 |                 # path_aid_to_ans = os.path.join(self.dir_rslt, 'aid_to_ans.json')
147 |                 # with open(path_aid_to_ans, 'w') as f:
148 |                 #     json.dump(self.engine.model.network.aid_to_ans, f)
149 |     
150 | 
151 |     def reset_tdiuc(self):
152 |         self.pred_aids = defaultdict(list)
153 |         self.gt_aids = []
154 |         self.gt_types = []
155 |         self.gt_aid_not_found = 0
156 |         self.res_by_type = {key: defaultdict(list) for key in self.metric_list}
157 |     
158 |     
159 |     def compute_oe_accuracy(self):
160 |         logs_name_prefix = Options()['misc'].get('logs_name', '') or ''
161 |         
162 |         for key in self.metric_list:
163 |             logs_name = (logs_name_prefix + key) or "logs"
164 |             with open(self.path_rslt[key], 'w') as f:
165 |                 json.dump(self.results[key], f)
166 |             
167 |             # if self.dataset.split == 'test':
168 |             #     with open(self.path_rslt_testdev, 'w') as f:
169 |             #         json.dump(self.results_testdev, f)
170 | 
171 |             if 'test' not in self.dataset.split:
172 |                 call_to_prog = 'python -m block.models.metrics.compute_oe_accuracy '\
173 |                     + '--dir_vqa {} --dir_exp {} --dir_rslt {} --epoch {} --split {} --logs_name {} --rm {} &'\
174 |                     .format(self.dir_vqa, self.dir_exp, self.dir_rslt[key], self.engine.epoch, self.dataset.split, logs_name, self.rm_dir_rslt)
175 |                 Logger()('`'+call_to_prog+'`')
176 |                 os.system(call_to_prog)
177 | 
178 | 
179 |     def compute_tdiuc_metrics(self):
180 |         Logger()('{} of validation answers were not found in ans_to_aid'.format(self.gt_aid_not_found))
181 |         
182 |         for key in self.metric_list:
183 |             Logger()(f'Computing TDIUC metrics for logits{key}')
184 |             accuracy = float(100*np.mean(np.array(self.pred_aids[key])==np.array(self.gt_aids)))
185 |             Logger()('Overall Traditional Accuracy is {:.2f}'.format(accuracy))
186 |             Logger().log_value('{}_epoch.tdiuc.accuracy{}'.format(self.mode, key), accuracy, should_print=False)
187 |             
188 |             types = list(set(self.gt_types))
189 |             sum_acc = []
190 |             eps = 1e-10
191 | 
192 |             Logger()('---------------------------------------')
193 |             Logger()('Not using per-answer normalization...')
194 |             for tp in types:
195 |                 acc = 100*(len(self.res_by_type[key][tp+'_t'])/len(self.res_by_type[key][tp+'_t']+self.res_by_type[key][tp+'_f']))
196 |                 sum_acc.append(acc+eps)
197 |                 Logger()(f"Accuracy {key} for class '{tp}' is {acc:.2f}")
198 |                 Logger().log_value('{}_epoch.tdiuc{}.perQuestionType.{}'.format(self.mode, key, tp), acc, should_print=False)
199 | 
200 |             acc_mpt_a = float(np.mean(np.array(sum_acc)))
201 |             Logger()('Arithmetic MPT Accuracy {} is {:.2f}'.format(key, acc_mpt_a))
202 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_a'.format(self.mode, key), acc_mpt_a, should_print=False)
203 | 
204 |             acc_mpt_h = float(stats.hmean(sum_acc))
205 |             Logger()('Harmonic MPT Accuracy {} is {:.2f}'.format(key, acc_mpt_h))
206 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_h'.format(self.mode, key), acc_mpt_h, should_print=False)
207 |             
208 |             Logger()('---------------------------------------')
209 |             Logger()('Using per-answer normalization...')
210 |             for tp in types:
211 |                 per_ans_stat = defaultdict(int)
212 |                 for g,p in zip(self.res_by_type[key][tp+'_gt'],self.res_by_type[key][tp+'_pred']):
213 |                     per_ans_stat[str(g)+'_gt']+=1
214 |                     if g==p:
215 |                         per_ans_stat[str(g)]+=1
216 |                 unq_acc = 0
217 |                 for unq_ans in set(self.res_by_type[key][tp+'_gt']):
218 |                     acc_curr_ans = per_ans_stat[str(unq_ans)]/per_ans_stat[str(unq_ans)+'_gt']
219 |                     unq_acc +=acc_curr_ans
220 |                 acc = 100*unq_acc/len(set(self.res_by_type[key][tp+'_gt']))
221 |                 sum_acc.append(acc+eps)
222 |                 Logger()("Accuracy {} for class '{}' is {:.2f}".format(key, tp, acc))
223 |                 Logger().log_value('{}_epoch.tdiuc{}.perQuestionType_norm.{}'.format(self.mode, key, tp), acc, should_print=False)
224 | 
225 |             acc_mpt_a = float(np.mean(np.array(sum_acc)))
226 |             Logger()('Arithmetic MPT Accuracy is {:.2f}'.format(acc_mpt_a))
227 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_a_norm'.format(self.mode, key), acc_mpt_a, should_print=False)
228 | 
229 |             acc_mpt_h = float(stats.hmean(sum_acc))
230 |             Logger()('Harmonic MPT Accuracy is {:.2f}'.format(acc_mpt_h))
231 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_h_norm'.format(self.mode, key), acc_mpt_h, should_print=False)
232 | 


--------------------------------------------------------------------------------
/cfvqa/models/metrics/vqa_rubi_metrics.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import os
  4 | import json
  5 | from scipy import stats
  6 | import numpy as np
  7 | from collections import defaultdict
  8 | 
  9 | from bootstrap.models.metrics.accuracy import accuracy
 10 | from block.models.metrics.vqa_accuracies import VQAAccuracies
 11 | from bootstrap.lib.logger import Logger
 12 | from bootstrap.lib.options import Options
 13 | from bootstrap.lib.logger import Logger
 14 | 
 15 | class VQAAccuracy(nn.Module):
 16 | 
 17 |     def __init__(self, topk=[1,5]):
 18 |         super().__init__()
 19 |         self.topk = topk
 20 | 
 21 |     def forward(self, cri_out, net_out, batch):
 22 |         out = {}
 23 |         class_id = batch['class_id'].data.cpu()
 24 |         for key in ['', '_all', '_q']:
 25 |             logits = net_out[f'logits{key}'].data.cpu()
 26 |             acc_out = accuracy(logits, class_id, topk=self.topk)
 27 |             for i, k in enumerate(self.topk):
 28 |                 out[f'accuracy{key}_top{k}'] = acc_out[i]
 29 |         return out
 30 | 
 31 | 
 32 | class VQARUBiMetrics(VQAAccuracies):
 33 | 
 34 |     def __init__(self, *args, **kwargs):
 35 |         super().__init__(*args, **kwargs)
 36 |         self.accuracy = VQAAccuracy()
 37 |         self.rm_dir_rslt = 1 if Options()['dataset.train_split'] is not None else 0
 38 | 
 39 |     def forward(self, cri_out, net_out, batch):
 40 |         out = {}
 41 |         if self.accuracy is not None:
 42 |             out = self.accuracy(cri_out, net_out, batch)
 43 | 
 44 |         # add answers and answer_ids keys to net_out
 45 |         net_out = self.engine.model.network.process_answers(net_out)
 46 | 
 47 |         batch_size = len(batch['index'])
 48 |         for i in range(batch_size):
 49 |             
 50 |             # Open Ended Accuracy (VQA-VQA2)
 51 |             if self.open_ended:
 52 |                 for key in ['', '_all', '_q']:
 53 |                     pred_item = {
 54 |                         'question_id': batch['question_id'][i],
 55 |                         'answer': net_out[f'answers{key}'][i]
 56 |                     }
 57 |                     self.results[key].append(pred_item)
 58 | 
 59 |                 if self.dataset.split == 'test':
 60 |                     pred_item = {
 61 |                         'question_id': batch['question_id'][i],
 62 |                         'answer': net_out[f'answers'][i]
 63 |                     }
 64 |                     if 'is_testdev' in batch and batch['is_testdev'][i]:
 65 |                         self.results_testdev.append(pred_item)
 66 | 
 67 |                     if self.logits['tensor'] is None:
 68 |                         self.logits['tensor'] = torch.FloatTensor(len(self.dataset), logits.size(1))
 69 | 
 70 |                     self.logits['tensor'][self.idx] = logits[i]
 71 |                     self.logits['qid_to_idx'][batch['question_id'][i]] = self.idx
 72 |                     
 73 |                     self.idx += 1
 74 | 
 75 |                 # TDIUC metrics
 76 |                 if self.tdiuc:
 77 |                     gt_aid = batch['answer_id'][i]
 78 |                     gt_ans = batch['answer'][i]
 79 |                     gt_type = batch['question_type'][i]
 80 |                     self.gt_types.append(gt_type)
 81 |                     if gt_ans in self.ans_to_aid:
 82 |                         self.gt_aids.append(gt_aid)
 83 |                     else:
 84 |                         self.gt_aids.append(-1)
 85 |                         self.gt_aid_not_found += 1
 86 | 
 87 |                     for key in ['', '_all', '_q']:
 88 |                         qid = batch['question_id'][i]
 89 |                         pred_aid = net_out[f'answer_ids{key}'][i]
 90 |                         self.pred_aids[key].append(pred_aid)
 91 | 
 92 |                         self.res_by_type[key][gt_type+'_pred'].append(pred_aid)
 93 | 
 94 |                         if gt_ans in self.ans_to_aid:
 95 |                             self.res_by_type[key][gt_type+'_gt'].append(gt_aid)
 96 |                             if gt_aid == pred_aid:
 97 |                                 self.res_by_type[key][gt_type+'_t'].append(pred_aid)
 98 |                             else:
 99 |                                 self.res_by_type[key][gt_type+'_f'].append(pred_aid)
100 |                         else:
101 |                             self.res_by_type[key][gt_type+'_gt'].append(-1)
102 |                             self.res_by_type[key][gt_type+'_f'].append(pred_aid)
103 |         return out
104 | 
105 |     def reset_oe(self):
106 |         self.results = dict()
107 |         self.dir_rslt = dict()
108 |         self.path_rslt = dict()
109 |         for key in ['', '_q', '_all']:
110 |             self.results[key] = []
111 |             self.dir_rslt[key] = os.path.join(
112 |                 self.dir_exp,
113 |                 f'results{key}',
114 |                 self.dataset.split,
115 |                 'epoch,{}'.format(self.engine.epoch))
116 |             os.system('mkdir -p '+self.dir_rslt[key])
117 |             self.path_rslt[key] = os.path.join(
118 |                 self.dir_rslt[key],
119 |                 'OpenEnded_mscoco_{}_model_results.json'.format(
120 |                     self.dataset.get_subtype()))
121 | 
122 |             if self.dataset.split == 'test':
123 |                 pass
124 |                 # self.results_testdev = []
125 |                 # self.path_rslt_testdev = os.path.join(
126 |                 #     self.dir_rslt,
127 |                 #     'OpenEnded_mscoco_{}_model_results.json'.format(
128 |                 #         self.dataset.get_subtype(testdev=True)))
129 | 
130 |                 # self.path_logits = os.path.join(self.dir_rslt, 'logits.pth')
131 |                 # os.system('mkdir -p '+os.path.dirname(self.path_logits))
132 | 
133 |                 # self.logits = {}
134 |                 # self.logits['aid_to_ans'] = self.engine.model.network.aid_to_ans
135 |                 # self.logits['qid_to_idx'] = {}
136 |                 # self.logits['tensor'] = None
137 | 
138 |                 # self.idx = 0
139 | 
140 |                 # path_aid_to_ans = os.path.join(self.dir_rslt, 'aid_to_ans.json')
141 |                 # with open(path_aid_to_ans, 'w') as f:
142 |                 #     json.dump(self.engine.model.network.aid_to_ans, f)
143 |     
144 | 
145 |     def reset_tdiuc(self):
146 |         self.pred_aids = defaultdict(list)
147 |         self.gt_aids = []
148 |         self.gt_types = []
149 |         self.gt_aid_not_found = 0
150 |         self.res_by_type = {key: defaultdict(list) for key in ['', '_all', '_q']}
151 |     
152 |     
153 |     def compute_oe_accuracy(self):
154 |         logs_name_prefix = Options()['misc'].get('logs_name', '') or ''
155 |         
156 |         for key in ['', '_all', '_q']:
157 |             logs_name = (logs_name_prefix + key) or "logs"
158 |             with open(self.path_rslt[key], 'w') as f:
159 |                 json.dump(self.results[key], f)
160 |             
161 |             # if self.dataset.split == 'test':
162 |             #     with open(self.path_rslt_testdev, 'w') as f:
163 |             #         json.dump(self.results_testdev, f)
164 | 
165 |             if 'test' not in self.dataset.split:
166 |                 call_to_prog = 'python -m block.models.metrics.compute_oe_accuracy '\
167 |                     + '--dir_vqa {} --dir_exp {} --dir_rslt {} --epoch {} --split {} --logs_name {} --rm {} &'\
168 |                     .format(self.dir_vqa, self.dir_exp, self.dir_rslt[key], self.engine.epoch, self.dataset.split, logs_name, self.rm_dir_rslt)
169 |                 Logger()('`'+call_to_prog+'`')
170 |                 os.system(call_to_prog)
171 | 
172 | 
173 |     def compute_tdiuc_metrics(self):
174 |         Logger()('{} of validation answers were not found in ans_to_aid'.format(self.gt_aid_not_found))
175 |         
176 |         for key in ['', '_all', '_q']:
177 |             Logger()(f'Computing TDIUC metrics for logits{key}')
178 |             accuracy = float(100*np.mean(np.array(self.pred_aids[key])==np.array(self.gt_aids)))
179 |             Logger()('Overall Traditional Accuracy is {:.2f}'.format(accuracy))
180 |             Logger().log_value('{}_epoch.tdiuc.accuracy{}'.format(self.mode, key), accuracy, should_print=False)
181 |             
182 |             types = list(set(self.gt_types))
183 |             sum_acc = []
184 |             eps = 1e-10
185 | 
186 |             Logger()('---------------------------------------')
187 |             Logger()('Not using per-answer normalization...')
188 |             for tp in types:
189 |                 acc = 100*(len(self.res_by_type[key][tp+'_t'])/len(self.res_by_type[key][tp+'_t']+self.res_by_type[key][tp+'_f']))
190 |                 sum_acc.append(acc+eps)
191 |                 Logger()(f"Accuracy {key} for class '{tp}' is {acc:.2f}")
192 |                 Logger().log_value('{}_epoch.tdiuc{}.perQuestionType.{}'.format(self.mode, key, tp), acc, should_print=False)
193 | 
194 |             acc_mpt_a = float(np.mean(np.array(sum_acc)))
195 |             Logger()('Arithmetic MPT Accuracy {} is {:.2f}'.format(key, acc_mpt_a))
196 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_a'.format(self.mode, key), acc_mpt_a, should_print=False)
197 | 
198 |             acc_mpt_h = float(stats.hmean(sum_acc))
199 |             Logger()('Harmonic MPT Accuracy {} is {:.2f}'.format(key, acc_mpt_h))
200 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_h'.format(self.mode, key), acc_mpt_h, should_print=False)
201 |             
202 |             Logger()('---------------------------------------')
203 |             Logger()('Using per-answer normalization...')
204 |             for tp in types:
205 |                 per_ans_stat = defaultdict(int)
206 |                 for g,p in zip(self.res_by_type[key][tp+'_gt'],self.res_by_type[key][tp+'_pred']):
207 |                     per_ans_stat[str(g)+'_gt']+=1
208 |                     if g==p:
209 |                         per_ans_stat[str(g)]+=1
210 |                 unq_acc = 0
211 |                 for unq_ans in set(self.res_by_type[key][tp+'_gt']):
212 |                     acc_curr_ans = per_ans_stat[str(unq_ans)]/per_ans_stat[str(unq_ans)+'_gt']
213 |                     unq_acc +=acc_curr_ans
214 |                 acc = 100*unq_acc/len(set(self.res_by_type[key][tp+'_gt']))
215 |                 sum_acc.append(acc+eps)
216 |                 Logger()("Accuracy {} for class '{}' is {:.2f}".format(key, tp, acc))
217 |                 Logger().log_value('{}_epoch.tdiuc{}.perQuestionType_norm.{}'.format(self.mode, key, tp), acc, should_print=False)
218 | 
219 |             acc_mpt_a = float(np.mean(np.array(sum_acc)))
220 |             Logger()('Arithmetic MPT Accuracy is {:.2f}'.format(acc_mpt_a))
221 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_a_norm'.format(self.mode, key), acc_mpt_a, should_print=False)
222 | 
223 |             acc_mpt_h = float(stats.hmean(sum_acc))
224 |             Logger()('Harmonic MPT Accuracy is {:.2f}'.format(acc_mpt_h))
225 |             Logger().log_value('{}_epoch.tdiuc{}.acc_mpt_h_norm'.format(self.mode, key), acc_mpt_h, should_print=False)
226 | 


--------------------------------------------------------------------------------
/cfvqa/models/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuleiniu/cfvqa/736795bef50db9c76818f9a08202c7f325489afd/cfvqa/models/networks/__init__.py


--------------------------------------------------------------------------------
/cfvqa/models/networks/cfvqa.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from block.models.networks.mlp import MLP
  4 | from .utils import grad_mul_const # mask_softmax, grad_reverse, grad_reverse_mask, 
  5 | 
  6 | eps = 1e-12
  7 | 
  8 | class CFVQA(nn.Module):
  9 |     """
 10 |     Wraps another model
 11 |     The original model must return a dictionnary containing the 'logits' key (predictions before softmax)
 12 |     Returns:
 13 |         - logits_vq: the original predictions of the model, i.e., NIE
 14 |         - logits_q: the predictions from the question-only branch
 15 |         - logits_v: the predictions from the vision-only branch
 16 |         - logits_all: the predictions from the ensemble model
 17 |         - logits_cfvqa: the predictions based on CF-VQA, i.e., TIE
 18 |     => Use `logits_all`, `logits_q` and `logits_v` for the loss
 19 |     """
 20 |     def __init__(self, model, output_size, classif_q, classif_v, fusion_mode, end_classif=True, is_va=True):
 21 |         super().__init__()
 22 |         self.net = model
 23 |         self.end_classif = end_classif
 24 | 
 25 |         assert fusion_mode in ['rubi', 'hm', 'sum'], "Fusion mode should be rubi/hm/sum."
 26 |         self.fusion_mode = fusion_mode
 27 |         self.is_va = is_va and (not fusion_mode=='rubi') # RUBi does not consider V->A
 28 |             
 29 |         # Q->A branch
 30 |         self.q_1 = MLP(**classif_q)
 31 |         if self.end_classif: # default: True (following RUBi)
 32 |             self.q_2 = nn.Linear(output_size, output_size)
 33 | 
 34 |         # V->A branch
 35 |         if self.is_va: # default: True (containing V->A)
 36 |             self.v_1 = MLP(**classif_v)
 37 |             if self.end_classif: # default: True (following RUBi)
 38 |                 self.v_2 = nn.Linear(output_size, output_size)
 39 | 
 40 |         self.constant = nn.Parameter(torch.tensor(0.0))
 41 | 
 42 |     def forward(self, batch):
 43 |         out = {}
 44 |         # model prediction
 45 |         net_out = self.net(batch)
 46 |         logits = net_out['logits']
 47 | 
 48 |         # Q->A branch
 49 |         q_embedding = net_out['q_emb']  # N * q_emb
 50 |         q_embedding = grad_mul_const(q_embedding, 0.0) # don't backpropagate
 51 |         q_pred = self.q_1(q_embedding)
 52 | 
 53 |         # V->A branch
 54 |         if self.is_va:
 55 |             v_embedding = net_out['v_emb']  # N * v_emb
 56 |             v_embedding = grad_mul_const(v_embedding, 0.0) # don't backpropagate
 57 |             v_pred = self.v_1(v_embedding)
 58 |         else:
 59 |             v_pred = None
 60 | 
 61 |         # both q, k and v are the facts
 62 |         z_qkv = self.fusion(logits, q_pred, v_pred, q_fact=True,  k_fact=True, v_fact=True) # te
 63 |         # q is the fact while k and v are the counterfactuals
 64 |         z_q = self.fusion(logits, q_pred, v_pred, q_fact=True,  k_fact=False, v_fact=False) # nie
 65 |         
 66 |         logits_cfvqa = z_qkv - z_q
 67 | 
 68 |         if self.end_classif:
 69 |             q_out = self.q_2(q_pred)
 70 |             if self.is_va:
 71 |                 v_out = self.v_2(v_pred)
 72 |         else:
 73 |             q_out = q_pred
 74 |             if self.is_va:
 75 |                 v_out = v_pred
 76 | 
 77 |         out['logits_all'] = z_qkv # for optimization
 78 |         out['logits_vq']  = logits # predictions of the original VQ branch, i.e., NIE
 79 |         out['logits_cfvqa'] = logits_cfvqa # predictions of CFVQA, i.e., TIE
 80 |         out['logits_q'] = q_out # for optimization
 81 |         if self.is_va:
 82 |             out['logits_v'] = v_out # for optimization
 83 | 
 84 |         if self.is_va:
 85 |             out['z_nde'] = self.fusion(logits.clone().detach(), q_pred.clone().detach(), v_pred.clone().detach(), q_fact=True,  k_fact=False, v_fact=False) # tie
 86 |         else:
 87 |             out['z_nde'] = self.fusion(logits.clone().detach(), q_pred.clone().detach(), None, q_fact=True,  k_fact=False, v_fact=False) # tie
 88 |         
 89 |         return out
 90 | 
 91 |     def process_answers(self, out, key=''):
 92 |         out = self.net.process_answers(out, key='_all')
 93 |         out = self.net.process_answers(out, key='_vq')
 94 |         out = self.net.process_answers(out, key='_cfvqa')
 95 |         out = self.net.process_answers(out, key='_q')
 96 |         if self.is_va:
 97 |             out = self.net.process_answers(out, key='_v')
 98 |         return out
 99 | 
100 |     def fusion(self, z_k, z_q, z_v, q_fact=False, k_fact=False, v_fact=False):
101 | 
102 |         z_k, z_q, z_v = self.transform(z_k, z_q, z_v, q_fact, k_fact, v_fact)
103 | 
104 |         if self.fusion_mode == 'rubi':
105 |             z = z_k * torch.sigmoid(z_q)
106 | 
107 |         elif self.fusion_mode == 'hm':
108 |             if self.is_va:
109 |                 z = z_k * z_q * z_v
110 |             else:
111 |                 z = z_k * z_q
112 |             z = torch.log(z + eps) - torch.log1p(z)
113 | 
114 |         elif self.fusion_mode == 'sum':
115 |             if self.is_va:
116 |                 z = z_k + z_q + z_v
117 |             else:
118 |                 z = z_k + z_q
119 |             z = torch.log(torch.sigmoid(z) + eps)
120 | 
121 |         return z
122 | 
123 |     def transform(self, z_k, z_q, z_v, q_fact=False, k_fact=False, v_fact=False):  
124 | 
125 |         if not k_fact:
126 |             z_k = self.constant * torch.ones_like(z_k).cuda()
127 | 
128 |         if not q_fact:
129 |             z_q = self.constant * torch.ones_like(z_q).cuda()
130 | 
131 |         if self.is_va:
132 |             if not v_fact:
133 |                 z_v = self.constant * torch.ones_like(z_v).cuda()
134 | 
135 |         if self.fusion_mode == 'hm':
136 |             z_k = torch.sigmoid(z_k)
137 |             z_q = torch.sigmoid(z_q)
138 |             if self.is_va:
139 |                 z_v = torch.sigmoid(z_v)
140 | 
141 |         return z_k, z_q, z_v


--------------------------------------------------------------------------------
/cfvqa/models/networks/factory.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import copy
 3 | import torch
 4 | import torch.nn as nn
 5 | import os
 6 | import json
 7 | from bootstrap.lib.options import Options
 8 | from bootstrap.models.networks.data_parallel import DataParallel
 9 | from block.models.networks.vqa_net import VQANet as AttentionNet
10 | from bootstrap.lib.logger import Logger
11 | 
12 | from .rubi import RUBiNet
13 | from .cfvqa import CFVQA
14 | 
15 | def factory(engine):
16 |     mode = list(engine.dataset.keys())[0]
17 |     dataset = engine.dataset[mode]
18 |     opt = Options()['model.network']
19 | 
20 | 
21 |     if opt['base'] == 'smrl':
22 |         from .smrl_net import SMRLNet as BaselineNet
23 |     elif opt['base'] == 'updn':
24 |         from .updn_net import UpDnNet as BaselineNet
25 |     elif opt['base'] == 'san':
26 |         from .san_net import SANNet as BaselineNet
27 |     else:
28 |         raise ValueError(opt['base'])
29 | 
30 |     orig_net = BaselineNet(
31 |         txt_enc=opt['txt_enc'],
32 |         self_q_att=opt['self_q_att'],
33 |         agg=opt['agg'],
34 |         classif=opt['classif'],
35 |         wid_to_word=dataset.wid_to_word,
36 |         word_to_wid=dataset.word_to_wid,
37 |         aid_to_ans=dataset.aid_to_ans,
38 |         ans_to_aid=dataset.ans_to_aid,
39 |         fusion=opt['fusion'],
40 |         residual=opt['residual'],
41 |         q_single=opt['q_single'],
42 |     )
43 | 
44 |     if opt['name'] == 'baseline':
45 |         net = orig_net
46 | 
47 |     elif opt['name'] == 'rubi':
48 |         net = RUBiNet(
49 |             model=orig_net,
50 |             output_size=len(dataset.aid_to_ans),
51 |             classif=opt['rubi_params']['mlp_q']
52 |         )
53 | 
54 |     elif opt['name'] == 'cfvqa':
55 |         net = CFVQA(
56 |             model=orig_net,
57 |             output_size=len(dataset.aid_to_ans),
58 |             classif_q=opt['cfvqa_params']['mlp_q'],
59 |             classif_v=opt['cfvqa_params']['mlp_v'],
60 |             fusion_mode=opt['fusion_mode'],
61 |             is_va=True
62 |         )
63 | 
64 |     elif opt['name'] == 'cfvqasimple':
65 |         net = CFVQA(
66 |             model=orig_net,
67 |             output_size=len(dataset.aid_to_ans),
68 |             classif_q=opt['cfvqa_params']['mlp_q'],
69 |             classif_v=None,
70 |             fusion_mode=opt['fusion_mode'],
71 |             is_va=False
72 |         )
73 | 
74 |     else:
75 |         raise ValueError(opt['name'])
76 | 
77 |     if Options()['misc.cuda'] and torch.cuda.device_count() > 1:
78 |         net = DataParallel(net)
79 | 
80 |     return net
81 | 
82 | 


--------------------------------------------------------------------------------
/cfvqa/models/networks/rubi.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from block.models.networks.mlp import MLP
 4 | from .utils import grad_mul_const # mask_softmax, grad_reverse, grad_reverse_mask, 
 5 | 
 6 | 
 7 | class RUBiNet(nn.Module):
 8 |     """
 9 |     Wraps another model
10 |     The original model must return a dictionnary containing the 'logits' key (predictions before softmax)
11 |     Returns:
12 |         - logits: the original predictions of the model
13 |         - logits_q: the predictions from the question-only branch
14 |         - logits_rubi: the updated predictions from the model by the mask.
15 |     => Use `logits_rubi` and `logits_q` for the loss
16 |     """
17 |     def __init__(self, model, output_size, classif, end_classif=True):
18 |         super().__init__()
19 |         self.net = model
20 |         self.c_1 = MLP(**classif)
21 |         self.end_classif = end_classif
22 |         if self.end_classif:
23 |             self.c_2 = nn.Linear(output_size, output_size)
24 | 
25 |     def forward(self, batch):
26 |         out = {}
27 |         # model prediction
28 |         net_out = self.net(batch)
29 |         logits = net_out['logits']
30 | 
31 |         q_embedding = net_out['q_emb']  # N * q_emb
32 |         q_embedding = grad_mul_const(q_embedding, 0.0) # don't backpropagate through question encoder
33 |         q_pred = self.c_1(q_embedding)
34 |         fusion_pred = logits * torch.sigmoid(q_pred)
35 | 
36 |         if self.end_classif:
37 |             q_out = self.c_2(q_pred)
38 |         else:
39 |             q_out = q_pred
40 | 
41 |         out['logits'] = net_out['logits']
42 |         out['logits_all'] = fusion_pred
43 |         out['logits_q'] = q_out
44 |         return out
45 | 
46 |     def process_answers(self, out, key=''):
47 |         out = self.net.process_answers(out)
48 |         out = self.net.process_answers(out, key='_all')
49 |         out = self.net.process_answers(out, key='_q')
50 |         return out
51 | 


--------------------------------------------------------------------------------
/cfvqa/models/networks/san_net.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | import itertools
  3 | import os
  4 | import numpy as np
  5 | import scipy
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from bootstrap.lib.options import Options
 10 | from bootstrap.lib.logger import Logger
 11 | import block
 12 | from block.models.networks.vqa_net import factory_text_enc
 13 | from block.models.networks.mlp import MLP
 14 | 
 15 | from .utils import mask_softmax
 16 | 
 17 | from torch.nn.utils.weight_norm import weight_norm
 18 | from torch.autograd import Variable
 19 | 
 20 | class SANNet(nn.Module):
 21 | 
 22 |     def __init__(self,
 23 |             txt_enc={},
 24 |             self_q_att=False,
 25 |             agg={},
 26 |             classif={},
 27 |             wid_to_word={},
 28 |             word_to_wid={},
 29 |             aid_to_ans=[],
 30 |             ans_to_aid={},
 31 |             fusion={},
 32 |             residual=False,
 33 |             q_single=False
 34 |             ):
 35 |         super().__init__()
 36 |         self.self_q_att = self_q_att
 37 |         self.agg = agg
 38 |         assert self.agg['type'] in ['max', 'mean']
 39 |         self.classif = classif
 40 |         self.wid_to_word = wid_to_word
 41 |         self.word_to_wid = word_to_wid
 42 |         self.aid_to_ans = aid_to_ans
 43 |         self.ans_to_aid = ans_to_aid
 44 |         self.fusion = fusion
 45 |         self.residual = residual
 46 |         
 47 |         # Modules
 48 |         self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc)
 49 |         if self.self_q_att:
 50 |             self.q_att_linear0 = nn.Linear(2400, 512)
 51 |             self.q_att_linear1 = nn.Linear(512, 2)
 52 | 
 53 |         if q_single:
 54 |             self.txt_enc_single = self.get_text_enc(self.wid_to_word, txt_enc)
 55 |             if self.self_q_att:
 56 |                 self.q_att_linear0_single = nn.Linear(2400, 512)
 57 |                 self.q_att_linear1_single = nn.Linear(512, 2)
 58 | 
 59 |         if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans):
 60 |             Logger()(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})" 
 61 |              f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension.")
 62 |             self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans) 
 63 | 
 64 |         self.classif_module = MLP(**self.classif['mlp'])
 65 | 
 66 |         # UpDn
 67 |         q_dim = self.fusion['input_dims'][0]
 68 |         v_dim = self.fusion['input_dims'][1]
 69 |         output_dim = self.fusion['output_dim']
 70 |         att_size = 512
 71 |         self.v_att = Attention(v_dim, v_dim, att_size, 36, output_dim, drop_ratio=0.5)
 72 |         self.txt_enc.rnn = QuestionEmbedding(620, q_dim, 1, False, 0.0)
 73 | 
 74 |         self.q_net = FCNet([q_dim, output_dim])
 75 |         # self.v_net = FCNet([v_dim, output_dim])
 76 | 
 77 |         Logger().log_value('nparams',
 78 |             sum(p.numel() for p in self.parameters() if p.requires_grad),
 79 |             should_print=True)
 80 | 
 81 |         Logger().log_value('nparams_txt_enc',
 82 |             self.get_nparams_txt_enc(),
 83 |             should_print=True)
 84 | 
 85 |       
 86 |     def get_text_enc(self, vocab_words, options):
 87 |         """
 88 |         returns the text encoding network. 
 89 |         """
 90 |         return factory_text_enc(self.wid_to_word, options)
 91 | 
 92 |     def get_nparams_txt_enc(self):
 93 |         params = [p.numel() for p in self.txt_enc.parameters() if p.requires_grad]
 94 |         if self.self_q_att:
 95 |             params += [p.numel() for p in self.q_att_linear0.parameters() if p.requires_grad]
 96 |             params += [p.numel() for p in self.q_att_linear1.parameters() if p.requires_grad]
 97 |         return sum(params)
 98 | 
 99 |     def forward(self, batch):
100 |         v = batch['visual']
101 |         q = batch['question']
102 |         l = batch['lengths'].data
103 |         c = batch['norm_coord']
104 |         nb_regions = batch.get('nb_regions')
105 | 
106 |         out = {}
107 | 
108 |         q_emb = self.process_question(q, l,)
109 |         out['v_emb'] = v.mean(1)
110 |         out['q_emb'] = q_emb
111 | 
112 |         # single txt encoder
113 |         if self.txt_enc_single is not None:
114 |             out['q_emb'] = self.process_question(q, l, self.txt_enc_single, self.q_att_linear0_single, self.q_att_linear1_single)
115 | 
116 |         # New
117 |         q_repr = self.q_net(q_emb)
118 |         joint_repr = self.v_att(q_repr, v)
119 | 
120 |         logits = self.classif_module(joint_repr)
121 |         out['logits'] = logits
122 | 
123 |         return out
124 | 
125 |     def process_question(self, q, l, txt_enc=None, q_att_linear0=None, q_att_linear1=None):
126 |         if txt_enc is None:
127 |             txt_enc = self.txt_enc
128 |         q_emb = txt_enc.embedding(q)
129 |         q = txt_enc.rnn(q_emb)
130 |         return q
131 | 
132 |     def process_answers(self, out, key=''):
133 |         batch_size = out[f'logits{key}'].shape[0]
134 |         _, pred = out[f'logits{key}'].data.max(1)
135 |         pred.squeeze_()
136 |         if batch_size != 1:
137 |             out[f'answers{key}'] = [self.aid_to_ans[pred[i].item()] for i in range(batch_size)]
138 |             out[f'answer_ids{key}'] = [pred[i].item() for i in range(batch_size)]
139 |         else:
140 |             out[f'answers{key}'] = [self.aid_to_ans[pred.item()]]
141 |             out[f'answer_ids{key}'] = [pred.item()]
142 |         return out
143 | 
144 | class Attention(nn.Module): # Extend PyTorch's Module class
145 |     def __init__(self, v_dim, q_dim, att_size, img_seq_size, output_size, drop_ratio):
146 |         super(Attention, self).__init__() # Must call super __init__()
147 |         self.v_dim = v_dim
148 |         self.q_dim = q_dim
149 |         self.att_size = att_size
150 |         self.img_seq_size = img_seq_size
151 |         self.output_size = output_size
152 |         self.drop_ratio = drop_ratio
153 | 
154 |         self.tan = nn.Tanh()
155 |         self.dp = nn.Dropout(drop_ratio)
156 |         self.sf = nn.Softmax()
157 | 
158 |         self.fc11 = nn.Linear(q_dim, 768, bias=True)
159 |         # self.fc111 = nn.Linear(768, 640, bias=True)
160 |         self.fc111 = nn.Linear(768, att_size, bias=True)
161 |         self.fc12 = nn.Linear(v_dim, 768, bias=False)
162 |         # self.fc121 = nn.Linear(768, 640, bias=False)
163 |         self.fc121 = nn.Linear(768, att_size, bias=False)
164 |         self.linear_second = nn.Linear(att_size, att_size, bias=False)
165 |         # self.linear_second = nn.Linear(att_size, img_seq_size, bias=False)
166 |         self.fc13 = nn.Linear(att_size, 1, bias=True)
167 | 
168 |         self.fc21 = nn.Linear(q_dim, att_size, bias=True)
169 |         self.fc22 = nn.Linear(v_dim, att_size, bias=False)
170 |         self.fc23 = nn.Linear(att_size, 1, bias=True)
171 | 
172 |         self.fc = nn.Linear(v_dim, output_size, bias=True)
173 | 
174 |         # d = input_size | m = img_seq_size | k = att_size
175 |     def forward(self, ques_feat, img_feat):  # ques_feat -- [batch, d] | img_feat -- [batch_size, m, d]
176 |         # print(img_feat.size(), ques_feat.size())
177 |         # print(self.v_dim, self.q_dim)
178 |         # print("=======================================================================") 
179 |         B = ques_feat.size(0)
180 | 
181 |         # Stack 1
182 |         
183 |         ques_emb_1 = self.fc11(ques_feat) 
184 |         ques_emb_1 = self.fc111(ques_emb_1) # [batch_size, att_size]
185 |         img_emb_1 = self.fc12(img_feat)
186 |         img_emb_1 = self.fc121(img_emb_1)
187 | 
188 |         # print(ques_emb_1.size(), img_emb_1.size())
189 |         # print("=======================================================================") 
190 |        
191 |         # h1 = self.tan(ques_emb_1.view(B, 1, self.att_size) + img_emb_1)
192 |         h1 = self.tan(ques_emb_1.view(B, 1, self.att_size) + img_emb_1)
193 |         h1_emb = self.linear_second(h1) 
194 |         h1_emb = self.fc13(h1_emb)
195 |         
196 |         p1 = self.sf(h1_emb.view(-1, self.img_seq_size)).view(B, 1, self.img_seq_size)
197 | 
198 |         # Weighted sum
199 |         img_att1 = p1.matmul(img_feat)
200 |         u1 = ques_feat + img_att1.view(-1, self.v_dim)
201 | 
202 |         # Stack 2
203 |         ques_emb_2 = self.fc21(u1)  # [batch_size, att_size]
204 |         img_emb_2 = self.fc22(img_feat)
205 | 
206 |         h2 = self.tan(ques_emb_2.view(B, 1, self.att_size) + img_emb_2)
207 | 
208 |         h2_emb = self.fc23(self.dp(h2))
209 |         p2 = self.sf(h2_emb.view(-1, self.img_seq_size)).view(B, 1, self.img_seq_size)
210 | 
211 |         # Weighted sum
212 |         img_att2 = p2.matmul(img_feat)
213 |         u2 = u1 + img_att2.view(-1, self.v_dim)
214 | 
215 |         return u2
216 | 
217 | class FCNet(nn.Module):
218 |     """Simple class for non-linear fully connect network
219 |     """
220 |     def __init__(self, dims):
221 |         super(FCNet, self).__init__()
222 | 
223 |         layers = []
224 |         for i in range(len(dims)-2):
225 |             in_dim = dims[i]
226 |             out_dim = dims[i+1]
227 |             layers.append(weight_norm(nn.Linear(in_dim, out_dim), dim=None))
228 |             layers.append(nn.ReLU())
229 |         layers.append(weight_norm(nn.Linear(dims[-2], dims[-1]), dim=None))
230 |         layers.append(nn.ReLU())
231 | 
232 |         self.main = nn.Sequential(*layers)
233 | 
234 |     def forward(self, x):
235 |         return self.main(x)
236 | 
237 |         
238 | class QuestionEmbedding(nn.Module):
239 |     def __init__(self, in_dim, num_hid, nlayers, bidirect, dropout, rnn_type='GRU'):
240 |         """Module for question embedding
241 |         """
242 |         super(QuestionEmbedding, self).__init__()
243 |         assert rnn_type == 'LSTM' or rnn_type == 'GRU'
244 |         rnn_cls = nn.LSTM if rnn_type == 'LSTM' else nn.GRU
245 | 
246 |         self.rnn = rnn_cls(
247 |             in_dim, num_hid, nlayers,
248 |             bidirectional=bidirect,
249 |             dropout=dropout,
250 |             batch_first=True)
251 | 
252 |         self.in_dim = in_dim
253 |         self.num_hid = num_hid
254 |         self.nlayers = nlayers
255 |         self.rnn_type = rnn_type
256 |         self.ndirections = 1 + int(bidirect)
257 | 
258 |     def init_hidden(self, batch):
259 |         # just to get the type of tensor
260 |         weight = next(self.parameters()).data
261 |         hid_shape = (self.nlayers * self.ndirections, batch, self.num_hid)
262 |         if self.rnn_type == 'LSTM':
263 |             return (Variable(weight.new(*hid_shape).zero_()),
264 |                     Variable(weight.new(*hid_shape).zero_()))
265 |         else:
266 |             return Variable(weight.new(*hid_shape).zero_())
267 | 
268 |     def forward(self, x):
269 |         # x: [batch, sequence, in_dim]
270 |         batch = x.size(0)
271 |         hidden = self.init_hidden(batch)
272 |         self.rnn.flatten_parameters()
273 |         output, hidden = self.rnn(x, hidden)
274 | 
275 |         if self.ndirections == 1:
276 |             return output[:, -1]
277 | 
278 |         forward_ = output[:, -1, :self.num_hid]
279 |         backward = output[:, 0, self.num_hid:]
280 |         return torch.cat((forward_, backward), dim=1)
281 | 
282 |     def forward_all(self, x):
283 |         # x: [batch, sequence, in_dim]
284 |         batch = x.size(0)
285 |         hidden = self.init_hidden(batch)
286 |         self.rnn.flatten_parameters()
287 |         output, hidden = self.rnn(x, hidden)
288 |         return output
289 | 


--------------------------------------------------------------------------------
/cfvqa/models/networks/smrl_net.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | import itertools
  3 | import os
  4 | import numpy as np
  5 | import scipy
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from bootstrap.lib.options import Options
 10 | from bootstrap.lib.logger import Logger
 11 | import block
 12 | from block.models.networks.vqa_net import factory_text_enc
 13 | from block.models.networks.mlp import MLP
 14 | 
 15 | from .utils import mask_softmax
 16 | 
 17 | class SMRLNet(nn.Module):
 18 | 
 19 |     def __init__(self,
 20 |             txt_enc={},
 21 |             self_q_att=False,
 22 |             agg={},
 23 |             classif={},
 24 |             wid_to_word={},
 25 |             word_to_wid={},
 26 |             aid_to_ans=[],
 27 |             ans_to_aid={},
 28 |             fusion={},
 29 |             residual=False,
 30 |             q_single=False,
 31 |             ):
 32 |         super().__init__()
 33 |         self.self_q_att = self_q_att
 34 |         self.agg = agg
 35 |         assert self.agg['type'] in ['max', 'mean']
 36 |         self.classif = classif
 37 |         self.wid_to_word = wid_to_word
 38 |         self.word_to_wid = word_to_wid
 39 |         self.aid_to_ans = aid_to_ans
 40 |         self.ans_to_aid = ans_to_aid
 41 |         self.fusion = fusion
 42 |         self.residual = residual
 43 |         
 44 |         # Modules
 45 |         self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc)
 46 |         if self.self_q_att:
 47 |             self.q_att_linear0 = nn.Linear(2400, 512)
 48 |             self.q_att_linear1 = nn.Linear(512, 2)
 49 | 
 50 |         if q_single:
 51 |             self.txt_enc_single = self.get_text_enc(self.wid_to_word, txt_enc)
 52 |             if self.self_q_att:
 53 |                 self.q_att_linear0_single = nn.Linear(2400, 512)
 54 |                 self.q_att_linear1_single = nn.Linear(512, 2)
 55 |         else:
 56 |             self.txt_enc_single = None
 57 | 
 58 |         self.fusion_module = block.factory_fusion(self.fusion)
 59 | 
 60 |         if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans):
 61 |             Logger()(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})" 
 62 |              f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension.")
 63 |             self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans) 
 64 | 
 65 |         self.classif_module = MLP(**self.classif['mlp'])
 66 | 
 67 |         Logger().log_value('nparams',
 68 |             sum(p.numel() for p in self.parameters() if p.requires_grad),
 69 |             should_print=True)
 70 | 
 71 |         Logger().log_value('nparams_txt_enc',
 72 |             self.get_nparams_txt_enc(),
 73 |             should_print=True)
 74 | 
 75 |       
 76 |     def get_text_enc(self, vocab_words, options):
 77 |         """
 78 |         returns the text encoding network. 
 79 |         """
 80 |         return factory_text_enc(self.wid_to_word, options)
 81 | 
 82 |     def get_nparams_txt_enc(self):
 83 |         params = [p.numel() for p in self.txt_enc.parameters() if p.requires_grad]
 84 |         if self.self_q_att:
 85 |             params += [p.numel() for p in self.q_att_linear0.parameters() if p.requires_grad]
 86 |             params += [p.numel() for p in self.q_att_linear1.parameters() if p.requires_grad]
 87 |         return sum(params)
 88 | 
 89 |     def process_fusion(self, q, mm):
 90 |         bsize = mm.shape[0]
 91 |         n_regions = mm.shape[1]
 92 | 
 93 |         mm = mm.contiguous().view(bsize*n_regions, -1)
 94 |         mm = self.fusion_module([q, mm])
 95 |         mm = mm.view(bsize, n_regions, -1)
 96 |         return mm
 97 | 
 98 |     def forward(self, batch):
 99 |         v = batch['visual']
100 |         q = batch['question']
101 |         l = batch['lengths'].data
102 |         c = batch['norm_coord']
103 |         nb_regions = batch.get('nb_regions')
104 |         bsize = v.shape[0]
105 |         n_regions = v.shape[1]
106 | 
107 |         out = {}
108 | 
109 |         q = self.process_question(q, l,)
110 |         out['q_emb'] = q
111 |         q_expand = q[:,None,:].expand(bsize, n_regions, q.shape[1])
112 |         q_expand = q_expand.contiguous().view(bsize*n_regions, -1)
113 | 
114 |         # single txt encoder
115 |         if self.txt_enc_single is not None:
116 |             out['q_emb'] = self.process_question(q, l, self.txt_enc_single, self.q_att_linear0_single, self.q_att_linear1_single)
117 | 
118 |         mm = self.process_fusion(q_expand, v,)
119 | 
120 |         if self.residual:
121 |             mm = v + mm
122 | 
123 |         if self.agg['type'] == 'max':
124 |             mm, mm_argmax = torch.max(mm, 1)
125 |         elif self.agg['type'] == 'mean':
126 |             mm = mm.mean(1)
127 | 
128 |         out['v_emb'] = v.mean(1)
129 |         out['mm'] = mm
130 |         out['mm_argmax'] = mm_argmax
131 | 
132 |         logits = self.classif_module(mm)
133 |         out['logits'] = logits
134 |         return out
135 | 
136 |     def process_question(self, q, l, txt_enc=None, q_att_linear0=None, q_att_linear1=None):
137 |         if txt_enc is None:
138 |             txt_enc = self.txt_enc
139 |         if q_att_linear0 is None:
140 |             q_att_linear0 = self.q_att_linear0
141 |         if q_att_linear1 is None:
142 |             q_att_linear1 = self.q_att_linear1
143 |         q_emb = txt_enc.embedding(q)
144 | 
145 |         q, _ = txt_enc.rnn(q_emb)
146 | 
147 |         if self.self_q_att:
148 |             q_att = q_att_linear0(q)
149 |             q_att = F.relu(q_att)
150 |             q_att = q_att_linear1(q_att)
151 |             q_att = mask_softmax(q_att, l)
152 |             #self.q_att_coeffs = q_att
153 |             if q_att.size(2) > 1:
154 |                 q_atts = torch.unbind(q_att, dim=2)
155 |                 q_outs = []
156 |                 for q_att in q_atts:
157 |                     q_att = q_att.unsqueeze(2)
158 |                     q_att = q_att.expand_as(q)
159 |                     q_out = q_att*q
160 |                     q_out = q_out.sum(1)
161 |                     q_outs.append(q_out)
162 |                 q = torch.cat(q_outs, dim=1)
163 |             else:
164 |                 q_att = q_att.expand_as(q)
165 |                 q = q_att * q
166 |                 q = q.sum(1)
167 |         else:
168 |             # l contains the number of words for each question
169 |             # in case of multi-gpus it must be a Tensor
170 |             # thus we convert it into a list during the forward pass
171 |             l = list(l.data[:,0])
172 |             q = txt_enc._select_last(q, l)
173 | 
174 |         return q
175 | 
176 |     def process_answers(self, out, key=''):
177 |         batch_size = out[f'logits{key}'].shape[0]
178 |         _, pred = out[f'logits{key}'].data.max(1)
179 |         pred.squeeze_()
180 |         if batch_size != 1:
181 |             out[f'answers{key}'] = [self.aid_to_ans[pred[i].item()] for i in range(batch_size)]
182 |             out[f'answer_ids{key}'] = [pred[i].item() for i in range(batch_size)]
183 |         else:
184 |             out[f'answers{key}'] = [self.aid_to_ans[pred.item()]]
185 |             out[f'answer_ids{key}'] = [pred.item()]
186 |         return out
187 | 


--------------------------------------------------------------------------------
/cfvqa/models/networks/updn_net.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | import itertools
  3 | import os
  4 | import numpy as np
  5 | import scipy
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from bootstrap.lib.options import Options
 10 | from bootstrap.lib.logger import Logger
 11 | import block
 12 | from block.models.networks.vqa_net import factory_text_enc
 13 | from block.models.networks.mlp import MLP
 14 | 
 15 | from .utils import mask_softmax
 16 | 
 17 | from torch.nn.utils.weight_norm import weight_norm
 18 | 
 19 | class UpDnNet(nn.Module):
 20 | 
 21 |     def __init__(self,
 22 |             txt_enc={},
 23 |             self_q_att=False,
 24 |             agg={},
 25 |             classif={},
 26 |             wid_to_word={},
 27 |             word_to_wid={},
 28 |             aid_to_ans=[],
 29 |             ans_to_aid={},
 30 |             fusion={},
 31 |             residual=False,
 32 |             q_single=False,
 33 |             ):
 34 |         super().__init__()
 35 |         self.self_q_att = self_q_att
 36 |         self.agg = agg
 37 |         assert self.agg['type'] in ['max', 'mean']
 38 |         self.classif = classif
 39 |         self.wid_to_word = wid_to_word
 40 |         self.word_to_wid = word_to_wid
 41 |         self.aid_to_ans = aid_to_ans
 42 |         self.ans_to_aid = ans_to_aid
 43 |         self.fusion = fusion
 44 |         self.residual = residual
 45 |         
 46 |         # Modules
 47 |         self.txt_enc = self.get_text_enc(self.wid_to_word, txt_enc)
 48 |         if self.self_q_att:
 49 |             self.q_att_linear0 = nn.Linear(2400, 512)
 50 |             self.q_att_linear1 = nn.Linear(512, 2)
 51 | 
 52 |         if q_single:
 53 |             self.txt_enc_single = self.get_text_enc(self.wid_to_word, txt_enc)
 54 |             if self.self_q_att:
 55 |                 self.q_att_linear0_single = nn.Linear(2400, 512)
 56 |                 self.q_att_linear1_single = nn.Linear(512, 2)
 57 |         else:
 58 |             self.txt_enc_single = None
 59 | 
 60 |         if self.classif['mlp']['dimensions'][-1] != len(self.aid_to_ans):
 61 |             Logger()(f"Warning, the classif_mm output dimension ({self.classif['mlp']['dimensions'][-1]})" 
 62 |              f"doesn't match the number of answers ({len(self.aid_to_ans)}). Modifying the output dimension.")
 63 |             self.classif['mlp']['dimensions'][-1] = len(self.aid_to_ans) 
 64 | 
 65 |         self.classif_module = MLP(**self.classif['mlp'])
 66 | 
 67 |         # UpDn
 68 |         q_dim = self.fusion['input_dims'][0]
 69 |         v_dim = self.fusion['input_dims'][1]
 70 |         output_dim = self.fusion['output_dim']
 71 |         self.v_att = Attention(v_dim, q_dim, output_dim)
 72 |         self.q_net = FCNet([q_dim, output_dim])
 73 |         self.v_net = FCNet([v_dim, output_dim])
 74 | 
 75 |         Logger().log_value('nparams',
 76 |             sum(p.numel() for p in self.parameters() if p.requires_grad),
 77 |             should_print=True)
 78 | 
 79 |         Logger().log_value('nparams_txt_enc',
 80 |             self.get_nparams_txt_enc(),
 81 |             should_print=True)
 82 | 
 83 |       
 84 |     def get_text_enc(self, vocab_words, options):
 85 |         """
 86 |         returns the text encoding network. 
 87 |         """
 88 |         return factory_text_enc(self.wid_to_word, options)
 89 | 
 90 |     def get_nparams_txt_enc(self):
 91 |         params = [p.numel() for p in self.txt_enc.parameters() if p.requires_grad]
 92 |         if self.self_q_att:
 93 |             params += [p.numel() for p in self.q_att_linear0.parameters() if p.requires_grad]
 94 |             params += [p.numel() for p in self.q_att_linear1.parameters() if p.requires_grad]
 95 |         return sum(params)
 96 | 
 97 |     def forward(self, batch):
 98 |         v = batch['visual']
 99 |         q = batch['question']
100 |         l = batch['lengths'].data
101 |         c = batch['norm_coord']
102 |         nb_regions = batch.get('nb_regions')
103 | 
104 |         out = {}
105 | 
106 |         q_emb = self.process_question(q, l,)
107 |         out['v_emb'] = v.mean(1)
108 |         out['q_emb'] = q_emb
109 |         
110 |         # single txt encoder
111 |         if self.txt_enc_single is not None:
112 |             out['q_emb'] = self.process_question(q, l, self.txt_enc_single, self.q_att_linear0_single, self.q_att_linear1_single)
113 | 
114 |         # New
115 |         att = self.v_att(v, q_emb)
116 |         v_emb = (att * v).sum(1)
117 |         q_repr = self.q_net(q_emb)
118 |         v_repr = self.v_net(v_emb)
119 |         joint_repr = q_repr * v_repr
120 | 
121 |         logits = self.classif_module(joint_repr)
122 |         out['logits'] = logits
123 | 
124 |         return out
125 | 
126 |     def process_question(self, q, l, txt_enc=None, q_att_linear0=None, q_att_linear1=None):
127 |         if txt_enc is None:
128 |             txt_enc = self.txt_enc
129 |         if q_att_linear0 is None:
130 |             q_att_linear0 = self.q_att_linear0
131 |         if q_att_linear1 is None:
132 |             q_att_linear1 = self.q_att_linear1
133 |         q_emb = txt_enc.embedding(q)
134 | 
135 |         q, _ = txt_enc.rnn(q_emb)
136 | 
137 |         if self.self_q_att:
138 |             q_att = q_att_linear0(q)
139 |             q_att = F.relu(q_att)
140 |             q_att = q_att_linear1(q_att)
141 |             q_att = mask_softmax(q_att, l)
142 |             #self.q_att_coeffs = q_att
143 |             if q_att.size(2) > 1:
144 |                 q_atts = torch.unbind(q_att, dim=2)
145 |                 q_outs = []
146 |                 for q_att in q_atts:
147 |                     q_att = q_att.unsqueeze(2)
148 |                     q_att = q_att.expand_as(q)
149 |                     q_out = q_att*q
150 |                     q_out = q_out.sum(1)
151 |                     q_outs.append(q_out)
152 |                 q = torch.cat(q_outs, dim=1)
153 |             else:
154 |                 q_att = q_att.expand_as(q)
155 |                 q = q_att * q
156 |                 q = q.sum(1)
157 |         else:
158 |             # l contains the number of words for each question
159 |             # in case of multi-gpus it must be a Tensor
160 |             # thus we convert it into a list during the forward pass
161 |             l = list(l.data[:,0])
162 |             q = txt_enc._select_last(q, l)
163 | 
164 |         return q
165 | 
166 |     def process_answers(self, out, key=''):
167 |         batch_size = out[f'logits{key}'].shape[0]
168 |         _, pred = out[f'logits{key}'].data.max(1)
169 |         pred.squeeze_()
170 |         if batch_size != 1:
171 |             out[f'answers{key}'] = [self.aid_to_ans[pred[i].item()] for i in range(batch_size)]
172 |             out[f'answer_ids{key}'] = [pred[i].item() for i in range(batch_size)]
173 |         else:
174 |             out[f'answers{key}'] = [self.aid_to_ans[pred.item()]]
175 |             out[f'answer_ids{key}'] = [pred.item()]
176 |         return out
177 | 
178 | class Attention(nn.Module):
179 |     def __init__(self, v_dim, q_dim, num_hid, dropout=0.2):
180 |         super(Attention, self).__init__()
181 | 
182 |         self.v_proj = FCNet([v_dim, num_hid])
183 |         self.q_proj = FCNet([q_dim, num_hid])
184 |         self.dropout = nn.Dropout(dropout)
185 |         self.linear = weight_norm(nn.Linear(num_hid, 1), dim=None)
186 | 
187 |     def forward(self, v, q):
188 |         """
189 |         v: [batch, k, vdim]
190 |         q: [batch, qdim]
191 |         """
192 |         logits = self.logits(v, q)
193 |         w = nn.functional.softmax(logits, 1)
194 |         return w
195 | 
196 |     def logits(self, v, q):
197 |         batch, k, _ = v.size()
198 |         v_proj = self.v_proj(v) # [batch, k, qdim]
199 |         q_proj = self.q_proj(q).unsqueeze(1).repeat(1, k, 1)
200 |         joint_repr = v_proj * q_proj
201 |         joint_repr = self.dropout(joint_repr)
202 |         logits = self.linear(joint_repr)
203 |         return logits
204 | 
205 | class FCNet(nn.Module):
206 |     """Simple class for non-linear fully connect network
207 |     """
208 |     def __init__(self, dims):
209 |         super(FCNet, self).__init__()
210 | 
211 |         layers = []
212 |         for i in range(len(dims)-2):
213 |             in_dim = dims[i]
214 |             out_dim = dims[i+1]
215 |             layers.append(weight_norm(nn.Linear(in_dim, out_dim), dim=None))
216 |             layers.append(nn.ReLU())
217 |         layers.append(weight_norm(nn.Linear(dims[-2], dims[-1]), dim=None))
218 |         layers.append(nn.ReLU())
219 | 
220 |         self.main = nn.Sequential(*layers)
221 | 
222 |     def forward(self, x):
223 |         return self.main(x)


--------------------------------------------------------------------------------
/cfvqa/models/networks/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def mask_softmax(x, lengths):#, dim=1)
 4 |     mask = torch.zeros_like(x).to(device=x.device, non_blocking=True)
 5 |     t_lengths = lengths[:,:,None].expand_as(mask)
 6 |     arange_id = torch.arange(mask.size(1)).to(device=x.device, non_blocking=True)
 7 |     arange_id = arange_id[None,:,None].expand_as(mask)
 8 | 
 9 |     mask[arange_id<t_lengths] = 1
10 |     # https://stackoverflow.com/questions/42599498/numercially-stable-softmax
11 |     # https://stackoverflow.com/questions/34968722/how-to-implement-the-softmax-function-in-python
12 |     # exp(x - max(x)) instead of exp(x) is a trick
13 |     # to improve the numerical stability while giving
14 |     # the same outputs
15 |     x2 = torch.exp(x - torch.max(x))
16 |     x3 = x2 * mask
17 |     epsilon = 1e-5
18 |     x3_sum = torch.sum(x3, dim=1, keepdim=True) + epsilon
19 |     x4 = x3 / x3_sum.expand_as(x3)
20 |     return x4
21 | 
22 | 
23 | class GradReverseMask(torch.autograd.Function):
24 |     """
25 |     This layer is used to create an adversarial loss.
26 |     
27 |     """
28 |     @staticmethod
29 |     def forward(ctx, x, mask, weight):
30 |         """
31 |         The mask should be composed of 0 or 1. 
32 |         The '1' will get their gradient reversed..
33 |         """
34 |         ctx.save_for_backward(mask)
35 |         ctx.weight = weight
36 |         return x.view_as(x)
37 | 
38 |     @staticmethod
39 |     def backward(ctx, grad_output):
40 |         mask, = ctx.saved_tensors
41 |         mask_c = mask.clone().detach().float()
42 |         mask_c[mask == 0] = 1.0
43 |         mask_c[mask == 1] = - float(ctx.weight)
44 |         return grad_output * mask_c[:, None].float(), None, None
45 | 
46 | 
47 | def grad_reverse_mask(x, mask, weight=1):
48 |     return GradReverseMask.apply(x, mask, weight)
49 | 
50 | 
51 | class GradReverse(torch.autograd.Function):
52 |     """
53 |     This layer is used to create an adversarial loss.
54 |     """
55 |     @staticmethod
56 |     def forward(ctx, x):
57 |         return x.view_as(x)
58 | 
59 |     @staticmethod
60 |     def backward(ctx, grad_output):
61 |         return grad_output.neg()
62 | 
63 | def grad_reverse(x):
64 |     return GradReverse.apply(x)
65 | 
66 | 
67 | 
68 | class GradMulConst(torch.autograd.Function):
69 |     """
70 |     This layer is used to create an adversarial loss.
71 |     """
72 |     @staticmethod
73 |     def forward(ctx, x, const):
74 |         ctx.const = const
75 |         return x.view_as(x)
76 | 
77 |     @staticmethod
78 |     def backward(ctx, grad_output):
79 |         return grad_output * ctx.const, None
80 | 
81 | def grad_mul_const(x, const):
82 |     return GradMulConst.apply(x, const)
83 | 


--------------------------------------------------------------------------------
/cfvqa/optimizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuleiniu/cfvqa/736795bef50db9c76818f9a08202c7f325489afd/cfvqa/optimizers/__init__.py


--------------------------------------------------------------------------------
/cfvqa/optimizers/factory.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from bootstrap.lib.options import Options
 3 | from bootstrap.optimizers.factory import factory_optimizer
 4 | from block.optimizers.lr_scheduler import ReduceLROnPlateau
 5 | from block.optimizers.lr_scheduler import BanOptimizer
 6 | 
 7 | def factory(model, engine):
 8 |     opt = Options()['optimizer']
 9 | 
10 |     optimizer = BanOptimizer(engine,
11 |         name=Options()['optimizer'].get('name', 'Adamax'),
12 |         lr=Options()['optimizer']['lr'],
13 |         gradual_warmup_steps=Options()['optimizer'].get('gradual_warmup_steps', [0.5, 2.0, 4]),
14 |         lr_decay_epochs=Options()['optimizer'].get('lr_decay_epochs', [10, 20, 2]),
15 |         lr_decay_rate=Options()['optimizer'].get('lr_decay_rate', .25))
16 | 
17 |     if opt.get('lr_scheduler', None):
18 |         optimizer = ReduceLROnPlateau(optimizer, engine,
19 |             **opt['lr_scheduler'])
20 | 
21 |     if opt.get('init', None) == 'glorot':
22 |         for p in model.network.parameters():
23 |             if p.dim()==1:
24 |                 p.data.fill_(0)
25 |             elif p.dim()>=2:
26 |                 nn.init.xavier_uniform_(p.data)
27 |             else:
28 |                 raise ValueError(p.dim())
29 | 
30 |     return optimizer
31 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqa2/smrl_baseline.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqa2/smrl_baseline
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqa2 # or vqa2vg
 7 |   dir: data/vqa/vqa2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 |   vg: false
19 | model:
20 |   name: default
21 |   network:
22 |     import: cfvqa.models.networks.factory
23 |     base: smrl
24 |     name: baseline
25 |     rubi_params:
26 |       mlp_q:
27 |         input_dim: 4800
28 |         dimensions: [1024,1024,3000]
29 |     txt_enc:
30 |       name: skipthoughts
31 |       type: BayesianUniSkip
32 |       dropout: 0.25
33 |       fixed_emb: False
34 |       dir_st: data/skip-thoughts
35 |     self_q_att: True
36 |     residual: False
37 |     q_single: False
38 |     fusion:
39 |       type: block
40 |       input_dims: [4800, 2048]
41 |       output_dim: 2048
42 |       mm_dim: 1000
43 |       chunks: 20
44 |       rank: 15
45 |       dropout_input: 0.
46 |       dropout_pre_lin: 0.
47 |     agg:
48 |       type: max
49 |     classif:
50 |       mlp:
51 |         input_dim: 2048
52 |         dimensions: [1024,1024,3000]
53 |   criterion:
54 |     import: cfvqa.models.criterions.factory
55 |     name: vqa_cross_entropy
56 |   metric:
57 |     import: cfvqa.models.metrics.factory
58 |     name: vqa_accuracies
59 | optimizer:
60 |   import: cfvqa.optimizers.factory
61 |   name: Adam
62 |   lr: 0.0003
63 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
64 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
65 |   lr_decay_epochs: [14, 24, 2] #range
66 |   lr_decay_rate: .25
67 | engine:
68 |   name: logger
69 |   debug: False
70 |   print_freq: 10
71 |   nb_epochs: 22
72 |   saving_criteria:
73 |   - eval_epoch.accuracy_top1:max
74 | misc:
75 |   logs_name:
76 |   cuda: True
77 |   seed: 1337
78 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqa2/smrl_cfvqa_hm.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqa2/smrl_cfvqa_hm
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqa2 # or vqa2vg
 7 |   dir: data/vqa/vqa2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 |   vg: false
19 | model:
20 |   name: default
21 |   network:
22 |     import: cfvqa.models.networks.factory
23 |     base: smrl
24 |     name: cfvqa
25 |     fusion_mode: hm
26 |     cfvqa_params:
27 |       mlp_q:
28 |         input_dim: 4800
29 |         dimensions: [1024,1024,3000]
30 |       mlp_v:
31 |         input_dim: 2048
32 |         dimensions: [1024,1024,3000]
33 |     txt_enc:
34 |       name: skipthoughts
35 |       type: BayesianUniSkip
36 |       dropout: 0.25
37 |       fixed_emb: False
38 |       dir_st: data/skip-thoughts
39 |     self_q_att: True
40 |     residual: False
41 |     q_single: False
42 |     fusion:
43 |       type: block
44 |       input_dims: [4800, 2048]
45 |       output_dim: 2048
46 |       mm_dim: 1000
47 |       chunks: 20
48 |       rank: 15
49 |       dropout_input: 0.
50 |       dropout_pre_lin: 0.
51 |     agg:
52 |       type: max
53 |     classif:
54 |       mlp:
55 |         input_dim: 2048
56 |         dimensions: [1024,1024,3000]
57 |   criterion:
58 |     import: cfvqa.models.criterions.factory
59 |     name: cfvqa_criterion
60 |     question_loss_weight: 1.0
61 |     vision_loss_weight: 1.0
62 |   metric:
63 |     import: cfvqa.models.metrics.factory
64 |     name: vqa_cfvqa_metrics
65 | optimizer:
66 |   import: cfvqa.optimizers.factory
67 |   name: Adam
68 |   lr: 0.0003
69 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
70 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
71 |   lr_decay_epochs: [14, 24, 2] #range
72 |   lr_decay_rate: .25
73 | engine:
74 |   name: logger
75 |   debug: False
76 |   print_freq: 10
77 |   nb_epochs: 22
78 |   saving_criteria:
79 |   - eval_epoch.accuracy_all_top1:max
80 |   - eval_epoch.accuracy_vq_top1:max
81 |   - eval_epoch.accuracy_cfvqa_top1:max
82 | misc:
83 |   logs_name:
84 |   cuda: True
85 |   seed: 1337
86 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqa2/smrl_cfvqa_sum.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqa2/smrl_cfvqa_sum
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqa2 # or vqa2vg
 7 |   dir: data/vqa/vqa2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 |   vg: false
19 | model:
20 |   name: default
21 |   network:
22 |     import: cfvqa.models.networks.factory
23 |     base: smrl
24 |     name: cfvqa
25 |     fusion_mode: sum
26 |     cfvqa_params:
27 |       mlp_q:
28 |         input_dim: 4800
29 |         dimensions: [1024,1024,3000]
30 |       mlp_v:
31 |         input_dim: 2048
32 |         dimensions: [1024,1024,3000]
33 |     txt_enc:
34 |       name: skipthoughts
35 |       type: BayesianUniSkip
36 |       dropout: 0.25
37 |       fixed_emb: False
38 |       dir_st: data/skip-thoughts
39 |     self_q_att: True
40 |     residual: False
41 |     q_single: False
42 |     fusion:
43 |       type: block
44 |       input_dims: [4800, 2048]
45 |       output_dim: 2048
46 |       mm_dim: 1000
47 |       chunks: 20
48 |       rank: 15
49 |       dropout_input: 0.
50 |       dropout_pre_lin: 0.
51 |     agg:
52 |       type: max
53 |     classif:
54 |       mlp:
55 |         input_dim: 2048
56 |         dimensions: [1024,1024,3000]
57 |   criterion:
58 |     import: cfvqa.models.criterions.factory
59 |     name: cfvqa_criterion
60 |     question_loss_weight: 1.0
61 |     vision_loss_weight: 1.0
62 |   metric:
63 |     import: cfvqa.models.metrics.factory
64 |     name: vqa_cfvqa_metrics
65 | optimizer:
66 |   import: cfvqa.optimizers.factory
67 |   name: Adam
68 |   lr: 0.0003
69 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
70 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
71 |   lr_decay_epochs: [14, 24, 2] #range
72 |   lr_decay_rate: .25
73 | engine:
74 |   name: logger
75 |   debug: False
76 |   print_freq: 10
77 |   nb_epochs: 22
78 |   saving_criteria:
79 |   - eval_epoch.accuracy_all_top1:max
80 |   - eval_epoch.accuracy_vq_top1:max
81 |   - eval_epoch.accuracy_cfvqa_top1:max
82 | misc:
83 |   logs_name:
84 |   cuda: True
85 |   seed: 1337
86 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqa2/smrl_cfvqasimple_hm.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqa2/smrl_cfvqasimple_hm
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqa2 # or vqa2vg
 7 |   dir: data/vqa/vqa2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 |   vg: false
19 | model:
20 |   name: default
21 |   network:
22 |     import: cfvqa.models.networks.factory
23 |     base: smrl
24 |     name: cfvqasimple
25 |     fusion_mode: hm
26 |     cfvqa_params:
27 |       mlp_q:
28 |         input_dim: 4800
29 |         dimensions: [1024,1024,3000]
30 |     txt_enc:
31 |       name: skipthoughts
32 |       type: BayesianUniSkip
33 |       dropout: 0.25
34 |       fixed_emb: False
35 |       dir_st: data/skip-thoughts
36 |     self_q_att: True
37 |     residual: False
38 |     q_single: False
39 |     fusion:
40 |       type: block
41 |       input_dims: [4800, 2048]
42 |       output_dim: 2048
43 |       mm_dim: 1000
44 |       chunks: 20
45 |       rank: 15
46 |       dropout_input: 0.
47 |       dropout_pre_lin: 0.
48 |     agg:
49 |       type: max
50 |     classif:
51 |       mlp:
52 |         input_dim: 2048
53 |         dimensions: [1024,1024,3000]
54 |   criterion:
55 |     import: cfvqa.models.criterions.factory
56 |     name: cfvqasimple_criterion
57 |     question_loss_weight: 1.0
58 |   metric:
59 |     import: cfvqa.models.metrics.factory
60 |     name: vqa_cfvqasimple_metrics
61 | optimizer:
62 |   import: cfvqa.optimizers.factory
63 |   name: Adam
64 |   lr: 0.0003
65 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
66 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
67 |   lr_decay_epochs: [14, 24, 2] #range
68 |   lr_decay_rate: .25
69 | engine:
70 |   name: logger
71 |   debug: False
72 |   print_freq: 10
73 |   nb_epochs: 22
74 |   saving_criteria:
75 |   - eval_epoch.accuracy_all_top1:max
76 |   - eval_epoch.accuracy_vq_top1:max
77 |   - eval_epoch.accuracy_cfvqa_top1:max
78 | misc:
79 |   logs_name:
80 |   cuda: True
81 |   seed: 1337
82 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqa2/smrl_cfvqasimple_rubi.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqa2/smrl_cfvqasimple_rubi
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqa2 # or vqa2vg
 7 |   dir: data/vqa/vqa2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 |   vg: false
19 | model:
20 |   name: default
21 |   network:
22 |     import: cfvqa.models.networks.factory
23 |     base: smrl
24 |     name: cfvqasimple
25 |     fusion_mode: rubi
26 |     cfvqa_params:
27 |       mlp_q:
28 |         input_dim: 4800
29 |         dimensions: [1024,1024,3000]
30 |     txt_enc:
31 |       name: skipthoughts
32 |       type: BayesianUniSkip
33 |       dropout: 0.25
34 |       fixed_emb: False
35 |       dir_st: data/skip-thoughts
36 |     self_q_att: True
37 |     residual: False
38 |     q_single: False
39 |     fusion:
40 |       type: block
41 |       input_dims: [4800, 2048]
42 |       output_dim: 2048
43 |       mm_dim: 1000
44 |       chunks: 20
45 |       rank: 15
46 |       dropout_input: 0.
47 |       dropout_pre_lin: 0.
48 |     agg:
49 |       type: max
50 |     classif:
51 |       mlp:
52 |         input_dim: 2048
53 |         dimensions: [1024,1024,3000]
54 |   criterion:
55 |     import: cfvqa.models.criterions.factory
56 |     name: cfvqasimple_criterion
57 |     question_loss_weight: 1.0
58 |   metric:
59 |     import: cfvqa.models.metrics.factory
60 |     name: vqa_cfvqasimple_metrics
61 | optimizer:
62 |   import: cfvqa.optimizers.factory
63 |   name: Adam
64 |   lr: 0.0003
65 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
66 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
67 |   lr_decay_epochs: [14, 24, 2] #range
68 |   lr_decay_rate: .25
69 | engine:
70 |   name: logger
71 |   debug: False
72 |   print_freq: 10
73 |   nb_epochs: 22
74 |   saving_criteria:
75 |   - eval_epoch.accuracy_all_top1:max
76 |   - eval_epoch.accuracy_vq_top1:max
77 |   - eval_epoch.accuracy_cfvqa_top1:max
78 | misc:
79 |   logs_name:
80 |   cuda: True
81 |   seed: 1337
82 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqa2/smrl_cfvqasimple_sum.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqa2/smrl_cfvqasimple_sum
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqa2 # or vqa2vg
 7 |   dir: data/vqa/vqa2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 |   vg: false
19 | model:
20 |   name: default
21 |   network:
22 |     import: cfvqa.models.networks.factory
23 |     base: smrl
24 |     name: cfvqasimple
25 |     fusion_mode: sum
26 |     cfvqa_params:
27 |       mlp_q:
28 |         input_dim: 4800
29 |         dimensions: [1024,1024,3000]
30 |     txt_enc:
31 |       name: skipthoughts
32 |       type: BayesianUniSkip
33 |       dropout: 0.25
34 |       fixed_emb: False
35 |       dir_st: data/skip-thoughts
36 |     self_q_att: True
37 |     residual: False
38 |     q_single: False
39 |     fusion:
40 |       type: block
41 |       input_dims: [4800, 2048]
42 |       output_dim: 2048
43 |       mm_dim: 1000
44 |       chunks: 20
45 |       rank: 15
46 |       dropout_input: 0.
47 |       dropout_pre_lin: 0.
48 |     agg:
49 |       type: max
50 |     classif:
51 |       mlp:
52 |         input_dim: 2048
53 |         dimensions: [1024,1024,3000]
54 |   criterion:
55 |     import: cfvqa.models.criterions.factory
56 |     name: cfvqasimple_criterion
57 |     question_loss_weight: 1.0
58 |   metric:
59 |     import: cfvqa.models.metrics.factory
60 |     name: vqa_cfvqasimple_metrics
61 | optimizer:
62 |   import: cfvqa.optimizers.factory
63 |   name: Adam
64 |   lr: 0.0003
65 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
66 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
67 |   lr_decay_epochs: [14, 24, 2] #range
68 |   lr_decay_rate: .25
69 | engine:
70 |   name: logger
71 |   debug: False
72 |   print_freq: 10
73 |   nb_epochs: 22
74 |   saving_criteria:
75 |   - eval_epoch.accuracy_all_top1:max
76 |   - eval_epoch.accuracy_vq_top1:max
77 |   - eval_epoch.accuracy_cfvqa_top1:max
78 | misc:
79 |   logs_name:
80 |   cuda: True
81 |   seed: 1337
82 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqa2/smrl_rubi.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqa2/smrl_rubi
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqa2 # or vqa2vg
 7 |   dir: data/vqa/vqa2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 |   vg: false
19 | model:
20 |   name: default
21 |   network:
22 |     import: cfvqa.models.networks.factory
23 |     base: smrl
24 |     name: rubi
25 |     rubi_params:
26 |       mlp_q:
27 |         input_dim: 4800
28 |         dimensions: [1024,1024,3000]
29 |     txt_enc:
30 |       name: skipthoughts
31 |       type: BayesianUniSkip
32 |       dropout: 0.25
33 |       fixed_emb: False
34 |       dir_st: data/skip-thoughts
35 |     self_q_att: True
36 |     residual: False
37 |     q_single: False
38 |     fusion:
39 |       type: block
40 |       input_dims: [4800, 2048]
41 |       output_dim: 2048
42 |       mm_dim: 1000
43 |       chunks: 20
44 |       rank: 15
45 |       dropout_input: 0.
46 |       dropout_pre_lin: 0.
47 |     agg:
48 |       type: max
49 |     classif:
50 |       mlp:
51 |         input_dim: 2048
52 |         dimensions: [1024,1024,3000]
53 |   criterion:
54 |     import: cfvqa.models.criterions.factory
55 |     name: rubi_criterion
56 |     question_loss_weight: 1.0
57 |   metric:
58 |     import: cfvqa.models.metrics.factory
59 |     name: vqa_rubi_metrics
60 | optimizer:
61 |   import: cfvqa.optimizers.factory
62 |   name: Adam
63 |   lr: 0.0003
64 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
65 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
66 |   lr_decay_epochs: [14, 24, 2] #range
67 |   lr_decay_rate: .25
68 | engine:
69 |   name: logger
70 |   debug: False
71 |   print_freq: 10
72 |   nb_epochs: 22
73 |   saving_criteria:
74 |   - eval_epoch.accuracy_top1:max
75 |   - eval_epoch.accuracy_all_top1:max
76 | misc:
77 |   logs_name:
78 |   cuda: True
79 |   seed: 1337
80 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqacp2/smrl_baseline.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqacp2/smrl_baseline
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqacp2 # or vqa2vg
 7 |   dir: data/vqa/vqacp2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 | model:
19 |   name: default
20 |   network:
21 |     import: cfvqa.models.networks.factory
22 |     base: smrl
23 |     name: baseline
24 |     txt_enc:
25 |       name: skipthoughts
26 |       type: BayesianUniSkip
27 |       dropout: 0.25
28 |       fixed_emb: False
29 |       dir_st: data/skip-thoughts
30 |     self_q_att: True
31 |     residual: False
32 |     q_single: False
33 |     fusion:
34 |       type: block
35 |       input_dims: [4800, 2048]
36 |       output_dim: 2048
37 |       mm_dim: 1000
38 |       chunks: 20
39 |       rank: 15
40 |       dropout_input: 0.
41 |       dropout_pre_lin: 0.
42 |     agg:
43 |       type: max
44 |     classif:
45 |       mlp:
46 |         input_dim: 2048
47 |         dimensions: [1024,1024,3000]
48 |   criterion:
49 |     import: cfvqa.models.criterions.factory
50 |     name: vqa_cross_entropy
51 |   metric:
52 |     import: cfvqa.models.metrics.factory
53 |     name: vqa_accuracies
54 | optimizer:
55 |   import: cfvqa.optimizers.factory
56 |   name: Adam
57 |   lr: 0.0003
58 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
59 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
60 |   lr_decay_epochs: [14, 24, 2] #range
61 |   lr_decay_rate: .25
62 | engine:
63 |   name: logger
64 |   debug: False
65 |   print_freq: 10
66 |   nb_epochs: 22
67 |   saving_criteria:
68 |   - eval_epoch.accuracy_top1:max
69 | misc:
70 |   logs_name:
71 |   cuda: True
72 |   seed: 1337
73 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqacp2/smrl_cfvqa_hm.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqacp2/smrl_cfvqa_hm
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqacp2 # or vqa2vg
 7 |   dir: data/vqa/vqacp2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 | model:
19 |   name: default
20 |   network:
21 |     import: cfvqa.models.networks.factory
22 |     base: smrl
23 |     name: cfvqa
24 |     fusion_mode: hm
25 |     is_vq: True
26 |     cfvqa_params:
27 |       mlp_q:
28 |         input_dim: 4800
29 |         dimensions: [1024,1024,3000]
30 |       mlp_v:
31 |         input_dim: 2048
32 |         dimensions: [1024,1024,3000]
33 |     txt_enc:
34 |       name: skipthoughts
35 |       type: BayesianUniSkip
36 |       dropout: 0.25
37 |       fixed_emb: False
38 |       dir_st: data/skip-thoughts
39 |     self_q_att: True
40 |     residual: False
41 |     q_single: False
42 |     fusion:
43 |       type: block
44 |       input_dims: [4800, 2048]
45 |       output_dim: 2048
46 |       mm_dim: 1000
47 |       chunks: 20
48 |       rank: 15
49 |       dropout_input: 0.
50 |       dropout_pre_lin: 0.
51 |     agg:
52 |       type: max
53 |     classif:
54 |       mlp:
55 |         input_dim: 2048
56 |         dimensions: [1024,1024,3000]
57 |   criterion:
58 |     import: cfvqa.models.criterions.factory
59 |     name: cfvqa_criterion
60 |     question_loss_weight: 1.0
61 |     vision_loss_weight: 1.0
62 |   metric:
63 |     import: cfvqa.models.metrics.factory
64 |     name: vqa_cfvqa_metrics
65 | optimizer:
66 |   import: cfvqa.optimizers.factory
67 |   name: Adam
68 |   lr: 0.0003
69 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
70 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
71 |   lr_decay_epochs: [14, 24, 2] #range
72 |   lr_decay_rate: .25
73 | engine:
74 |   name: logger
75 |   debug: False
76 |   print_freq: 10
77 |   nb_epochs: 22
78 |   saving_criteria:
79 |   - eval_epoch.accuracy_all_top1:max
80 |   - eval_epoch.accuracy_vq_top1:max
81 |   - eval_epoch.accuracy_cfvqa_top1:max
82 | misc:
83 |   logs_name:
84 |   cuda: True
85 |   seed: 1337
86 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqacp2/smrl_cfvqa_sum.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqacp2/smrl_cfvqa_sum
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqacp2 # or vqa2vg
 7 |   dir: data/vqa/vqacp2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 | model:
19 |   name: default
20 |   network:
21 |     import: cfvqa.models.networks.factory
22 |     base: smrl
23 |     name: cfvqa
24 |     fusion_mode: sum
25 |     is_vq: True
26 |     cfvqa_params:
27 |       mlp_q:
28 |         input_dim: 4800
29 |         dimensions: [1024,1024,3000]
30 |       mlp_v:
31 |         input_dim: 2048
32 |         dimensions: [1024,1024,3000]
33 |     txt_enc:
34 |       name: skipthoughts
35 |       type: BayesianUniSkip
36 |       dropout: 0.25
37 |       fixed_emb: False
38 |       dir_st: data/skip-thoughts
39 |     self_q_att: True
40 |     residual: False
41 |     q_single: False
42 |     fusion:
43 |       type: block
44 |       input_dims: [4800, 2048]
45 |       output_dim: 2048
46 |       mm_dim: 1000
47 |       chunks: 20
48 |       rank: 15
49 |       dropout_input: 0.
50 |       dropout_pre_lin: 0.
51 |     agg:
52 |       type: max
53 |     classif:
54 |       mlp:
55 |         input_dim: 2048
56 |         dimensions: [1024,1024,3000]
57 |   criterion:
58 |     import: cfvqa.models.criterions.factory
59 |     name: cfvqa_criterion
60 |     question_loss_weight: 1.0
61 |     vision_loss_weight: 1.0
62 |   metric:
63 |     import: cfvqa.models.metrics.factory
64 |     name: vqa_cfvqa_metrics
65 | optimizer:
66 |   import: cfvqa.optimizers.factory
67 |   name: Adam
68 |   lr: 0.0003
69 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
70 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
71 |   lr_decay_epochs: [14, 24, 2] #range
72 |   lr_decay_rate: .25
73 | engine:
74 |   name: logger
75 |   debug: False
76 |   print_freq: 10
77 |   nb_epochs: 22
78 |   saving_criteria:
79 |   - eval_epoch.accuracy_all_top1:max
80 |   - eval_epoch.accuracy_vq_top1:max
81 |   - eval_epoch.accuracy_cfvqa_top1:max
82 | misc:
83 |   logs_name:
84 |   cuda: True
85 |   seed: 1337
86 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqacp2/smrl_cfvqasimple_hm.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqacp2/smrl_cfvqasimple_hm
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqacp2 # or vqa2vg
 7 |   dir: data/vqa/vqacp2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 | model:
19 |   name: default
20 |   network:
21 |     import: cfvqa.models.networks.factory
22 |     base: smrl
23 |     name: cfvqasimple
24 |     fusion_mode: hm
25 |     is_vq: False
26 |     cfvqa_params:
27 |       mlp_q:
28 |         input_dim: 4800
29 |         dimensions: [1024,1024,3000]
30 |     txt_enc:
31 |       name: skipthoughts
32 |       type: BayesianUniSkip
33 |       dropout: 0.25
34 |       fixed_emb: False
35 |       dir_st: data/skip-thoughts
36 |     self_q_att: True
37 |     residual: False
38 |     q_single: False
39 |     fusion:
40 |       type: block
41 |       input_dims: [4800, 2048]
42 |       output_dim: 2048
43 |       mm_dim: 1000
44 |       chunks: 20
45 |       rank: 15
46 |       dropout_input: 0.
47 |       dropout_pre_lin: 0.
48 |     agg:
49 |       type: max
50 |     classif:
51 |       mlp:
52 |         input_dim: 2048
53 |         dimensions: [1024,1024,3000]
54 |   criterion:
55 |     import: cfvqa.models.criterions.factory
56 |     name: cfvqasimple_criterion
57 |     question_loss_weight: 1.0
58 |   metric:
59 |     import: cfvqa.models.metrics.factory
60 |     name: vqa_cfvqasimple_metrics
61 | optimizer:
62 |   import: cfvqa.optimizers.factory
63 |   name: Adam
64 |   lr: 0.0003
65 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
66 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
67 |   lr_decay_epochs: [14, 24, 2] #range
68 |   lr_decay_rate: .25
69 | engine:
70 |   name: logger
71 |   debug: False
72 |   print_freq: 10
73 |   nb_epochs: 22
74 |   saving_criteria:
75 |   - eval_epoch.accuracy_all_top1:max
76 |   - eval_epoch.accuracy_vq_top1:max
77 |   - eval_epoch.accuracy_cfvqa_top1:max
78 | misc:
79 |   logs_name:
80 |   cuda: True
81 |   seed: 1337
82 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqacp2/smrl_cfvqasimple_rubi.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqacp2/smrl_cfvqasimple_rubi
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqacp2 # or vqa2vg
 7 |   dir: data/vqa/vqacp2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 | model:
19 |   name: default
20 |   network:
21 |     import: cfvqa.models.networks.factory
22 |     base: smrl
23 |     name: cfvqasimple
24 |     fusion_mode: rubi
25 |     is_vq: False
26 |     cfvqa_params:
27 |       mlp_q:
28 |         input_dim: 4800
29 |         dimensions: [1024,1024,3000]
30 |     txt_enc:
31 |       name: skipthoughts
32 |       type: BayesianUniSkip
33 |       dropout: 0.25
34 |       fixed_emb: False
35 |       dir_st: data/skip-thoughts
36 |     self_q_att: True
37 |     residual: False
38 |     q_single: False
39 |     fusion:
40 |       type: block
41 |       input_dims: [4800, 2048]
42 |       output_dim: 2048
43 |       mm_dim: 1000
44 |       chunks: 20
45 |       rank: 15
46 |       dropout_input: 0.
47 |       dropout_pre_lin: 0.
48 |     agg:
49 |       type: max
50 |     classif:
51 |       mlp:
52 |         input_dim: 2048
53 |         dimensions: [1024,1024,3000]
54 |   criterion:
55 |     import: cfvqa.models.criterions.factory
56 |     name: cfvqasimple_criterion
57 |     question_loss_weight: 1.0
58 |   metric:
59 |     import: cfvqa.models.metrics.factory
60 |     name: vqa_cfvqasimple_metrics
61 | optimizer:
62 |   import: cfvqa.optimizers.factory
63 |   name: Adam
64 |   lr: 0.0003
65 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
66 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
67 |   lr_decay_epochs: [14, 24, 2] #range
68 |   lr_decay_rate: .25
69 | engine:
70 |   name: logger
71 |   debug: False
72 |   print_freq: 10
73 |   nb_epochs: 22
74 |   saving_criteria:
75 |   - eval_epoch.accuracy_all_top1:max
76 |   - eval_epoch.accuracy_vq_top1:max
77 |   - eval_epoch.accuracy_cfvqa_top1:max
78 | misc:
79 |   logs_name:
80 |   cuda: True
81 |   seed: 1337
82 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqacp2/smrl_cfvqasimple_sum.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqacp2/smrl_cfvqasimple_sum
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqacp2 # or vqa2vg
 7 |   dir: data/vqa/vqacp2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 | model:
19 |   name: default
20 |   network:
21 |     import: cfvqa.models.networks.factory
22 |     base: smrl
23 |     name: cfvqasimple
24 |     fusion_mode: sum
25 |     is_vq: False
26 |     cfvqa_params:
27 |       mlp_q:
28 |         input_dim: 4800
29 |         dimensions: [1024,1024,3000]
30 |     txt_enc:
31 |       name: skipthoughts
32 |       type: BayesianUniSkip
33 |       dropout: 0.25
34 |       fixed_emb: False
35 |       dir_st: data/skip-thoughts
36 |     self_q_att: True
37 |     residual: False
38 |     q_single: False
39 |     fusion:
40 |       type: block
41 |       input_dims: [4800, 2048]
42 |       output_dim: 2048
43 |       mm_dim: 1000
44 |       chunks: 20
45 |       rank: 15
46 |       dropout_input: 0.
47 |       dropout_pre_lin: 0.
48 |     agg:
49 |       type: max
50 |     classif:
51 |       mlp:
52 |         input_dim: 2048
53 |         dimensions: [1024,1024,3000]
54 |   criterion:
55 |     import: cfvqa.models.criterions.factory
56 |     name: cfvqasimple_criterion
57 |     question_loss_weight: 1.0
58 |   metric:
59 |     import: cfvqa.models.metrics.factory
60 |     name: vqa_cfvqasimple_metrics
61 | optimizer:
62 |   import: cfvqa.optimizers.factory
63 |   name: Adam
64 |   lr: 0.0003
65 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
66 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
67 |   lr_decay_epochs: [14, 24, 2] #range
68 |   lr_decay_rate: .25
69 | engine:
70 |   name: logger
71 |   debug: False
72 |   print_freq: 10
73 |   nb_epochs: 22
74 |   saving_criteria:
75 |   - eval_epoch.accuracy_all_top1:max
76 |   - eval_epoch.accuracy_vq_top1:max
77 |   - eval_epoch.accuracy_cfvqa_top1:max
78 | misc:
79 |   logs_name:
80 |   cuda: True
81 |   seed: 1337
82 | 


--------------------------------------------------------------------------------
/cfvqa/options/vqacp2/smrl_rubi.yaml:
--------------------------------------------------------------------------------
 1 | exp:
 2 |   dir: logs/vqacp2/smrl_rubi
 3 |   resume: # last, best_[...], or empty (from scratch)
 4 | dataset:
 5 |   import: cfvqa.datasets.factory
 6 |   name: vqacp2 # or vqa2vg
 7 |   dir: data/vqa/vqacp2
 8 |   train_split: train
 9 |   eval_split: val # or test
10 |   proc_split: train # or trainval (preprocessing split, must be equal to train_split)
11 |   nb_threads: 4
12 |   batch_size: 256
13 |   nans: 3000
14 |   minwcount: 0
15 |   nlp: mcb
16 |   samplingans: True
17 |   dir_rcnn: data/vqa/coco/extract_rcnn/2018-04-27_bottom-up-attention_fixed_36
18 | model:
19 |   name: default
20 |   network:
21 |     import: cfvqa.models.networks.factory
22 |     base: smrl
23 |     name: rubi
24 |     rubi_params:
25 |       mlp_q:
26 |         input_dim: 4800
27 |         dimensions: [1024,1024,3000]
28 |     txt_enc:
29 |       name: skipthoughts
30 |       type: BayesianUniSkip
31 |       dropout: 0.25
32 |       fixed_emb: False
33 |       dir_st: data/skip-thoughts
34 |     self_q_att: True
35 |     residual: False
36 |     q_single: False
37 |     fusion:
38 |       type: block
39 |       input_dims: [4800, 2048]
40 |       output_dim: 2048
41 |       mm_dim: 1000
42 |       chunks: 20
43 |       rank: 15
44 |       dropout_input: 0.
45 |       dropout_pre_lin: 0.
46 |     agg:
47 |       type: max
48 |     classif:
49 |       mlp:
50 |         input_dim: 2048
51 |         dimensions: [1024,1024,3000]
52 |   criterion:
53 |     import: cfvqa.models.criterions.factory
54 |     name: rubi_criterion
55 |     question_loss_weight: 1.0
56 |   metric:
57 |     import: cfvqa.models.metrics.factory
58 |     name: vqa_rubi_metrics
59 | optimizer:
60 |   import: cfvqa.optimizers.factory
61 |   name: Adam
62 |   lr: 0.0003
63 |   gradual_warmup_steps: [0.5, 2.0, 7.0] #torch.linspace
64 |   gradual_warmup_steps_mm: [0.5, 2.0, 7.0] #torch.linspace
65 |   lr_decay_epochs: [14, 24, 2] #range
66 |   lr_decay_rate: .25
67 | engine:
68 |   name: logger
69 |   debug: False
70 |   print_freq: 10
71 |   nb_epochs: 22
72 |   saving_criteria:
73 |   - eval_epoch.accuracy_top1:max
74 |   - eval_epoch.accuracy_all_top1:max
75 | misc:
76 |   logs_name:
77 |   cuda: True
78 |   seed: 1337
79 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | block.bootstrap.pytorch
2 | h5py
3 | plotly==3.10.0


--------------------------------------------------------------------------------