├── .gitignore ├── LICENSE ├── README.md ├── baselines ├── README.md ├── clip │ ├── contextual.py │ ├── contra.py │ ├── dataset.py │ ├── evaluate_clip.py │ ├── evaluate_contextual.py │ ├── extras.py │ ├── nocontra.py │ ├── vilbert-and-bert-config.json │ ├── volta_src │ │ ├── config.py │ │ ├── embeddings.py │ │ ├── encoders.py │ │ ├── losses.py │ │ └── utils.py │ └── zero_shot.py └── crossencoders │ ├── analyze_results.py │ ├── contextual.py │ ├── contra.py │ ├── ctrl_uniter_base.json │ ├── nocontra.py │ ├── task_config │ ├── contextual.yml │ ├── contra.yml │ ├── nocontra.yml │ └── zero_shot.yml │ ├── vilbert_base.json │ ├── volta │ ├── OLD_encoders.py │ ├── __init__.py │ ├── config.py │ ├── datasets │ │ ├── __init__.py │ │ ├── _image_features_reader.py │ │ ├── concept_cap_dataset.py │ │ ├── flickr_grounding_dataset.py │ │ ├── gqa_dataset.py │ │ ├── guesswhat_dataset.py │ │ ├── guesswhat_pointing_dataset.py │ │ ├── nlvr2_dataset.py │ │ ├── refer_dense_caption.py │ │ ├── refer_expression_dataset.py │ │ ├── retrieval_dataset.py │ │ ├── vcr_dataset.py │ │ ├── visdial_dataset.py │ │ ├── vismadlibs_dataset.py │ │ ├── visual7w_pointing_dataset.py │ │ ├── visual_entailment_dataset.py │ │ ├── visual_genome_dataset.py │ │ └── vqa_dataset.py │ ├── embeddings.py │ ├── encoders.py │ ├── extras.py │ ├── losses.py │ ├── optimization.py │ ├── task_utils.py │ ├── train_utils.py │ └── utils.py │ └── zero_shot.py ├── data ├── analysis │ ├── annotator_agreement.py │ ├── annotator_bias.py │ ├── annotator_split_valid.json │ ├── calc_accuracies.py │ ├── compare_dataset_statistics.py │ ├── convert.py │ ├── convert_zeroshot.py │ ├── counter2key_test.json │ ├── counter2key_train.json │ ├── counter2key_val.json │ ├── img_similarity.py │ ├── manual_annotation_valid.yaml │ └── shortid2id.json ├── test_data_unlabeled.json ├── train_data.json ├── train_simple.json ├── valid_data.json ├── valid_simple.json └── vilbert_data_format │ ├── test.json │ ├── test_target.pkl │ ├── train.json │ ├── train_target.pkl │ ├── trainval_ans2label.pkl │ ├── trainval_label2ans.pkl │ ├── val.json │ └── val_target.pkl ├── example.png └── install.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/README.md -------------------------------------------------------------------------------- /baselines/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/README.md -------------------------------------------------------------------------------- /baselines/clip/contextual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/contextual.py -------------------------------------------------------------------------------- /baselines/clip/contra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/contra.py -------------------------------------------------------------------------------- /baselines/clip/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/dataset.py -------------------------------------------------------------------------------- /baselines/clip/evaluate_clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/evaluate_clip.py -------------------------------------------------------------------------------- /baselines/clip/evaluate_contextual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/evaluate_contextual.py -------------------------------------------------------------------------------- /baselines/clip/extras.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/extras.py -------------------------------------------------------------------------------- /baselines/clip/nocontra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/nocontra.py -------------------------------------------------------------------------------- /baselines/clip/vilbert-and-bert-config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/vilbert-and-bert-config.json -------------------------------------------------------------------------------- /baselines/clip/volta_src/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/volta_src/config.py -------------------------------------------------------------------------------- /baselines/clip/volta_src/embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/volta_src/embeddings.py -------------------------------------------------------------------------------- /baselines/clip/volta_src/encoders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/volta_src/encoders.py -------------------------------------------------------------------------------- /baselines/clip/volta_src/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/volta_src/losses.py -------------------------------------------------------------------------------- /baselines/clip/volta_src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/volta_src/utils.py -------------------------------------------------------------------------------- /baselines/clip/zero_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/clip/zero_shot.py -------------------------------------------------------------------------------- /baselines/crossencoders/analyze_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/analyze_results.py -------------------------------------------------------------------------------- /baselines/crossencoders/contextual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/contextual.py -------------------------------------------------------------------------------- /baselines/crossencoders/contra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/contra.py -------------------------------------------------------------------------------- /baselines/crossencoders/ctrl_uniter_base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/ctrl_uniter_base.json -------------------------------------------------------------------------------- /baselines/crossencoders/nocontra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/nocontra.py -------------------------------------------------------------------------------- /baselines/crossencoders/task_config/contextual.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/task_config/contextual.yml -------------------------------------------------------------------------------- /baselines/crossencoders/task_config/contra.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/task_config/contra.yml -------------------------------------------------------------------------------- /baselines/crossencoders/task_config/nocontra.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/task_config/nocontra.yml -------------------------------------------------------------------------------- /baselines/crossencoders/task_config/zero_shot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/task_config/zero_shot.yml -------------------------------------------------------------------------------- /baselines/crossencoders/vilbert_base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/vilbert_base.json -------------------------------------------------------------------------------- /baselines/crossencoders/volta/OLD_encoders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/OLD_encoders.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/__init__.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/config.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/__init__.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/_image_features_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/_image_features_reader.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/concept_cap_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/concept_cap_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/flickr_grounding_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/flickr_grounding_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/gqa_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/gqa_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/guesswhat_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/guesswhat_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/guesswhat_pointing_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/guesswhat_pointing_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/nlvr2_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/nlvr2_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/refer_dense_caption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/refer_dense_caption.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/refer_expression_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/refer_expression_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/retrieval_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/retrieval_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/vcr_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/vcr_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/visdial_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/visdial_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/vismadlibs_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/vismadlibs_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/visual7w_pointing_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/visual7w_pointing_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/visual_entailment_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/visual_entailment_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/visual_genome_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/visual_genome_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/datasets/vqa_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/datasets/vqa_dataset.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/embeddings.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/encoders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/encoders.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/extras.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/extras.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/losses.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/optimization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/optimization.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/task_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/task_utils.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/train_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/train_utils.py -------------------------------------------------------------------------------- /baselines/crossencoders/volta/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/volta/utils.py -------------------------------------------------------------------------------- /baselines/crossencoders/zero_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/baselines/crossencoders/zero_shot.py -------------------------------------------------------------------------------- /data/analysis/annotator_agreement.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/annotator_agreement.py -------------------------------------------------------------------------------- /data/analysis/annotator_bias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/annotator_bias.py -------------------------------------------------------------------------------- /data/analysis/annotator_split_valid.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/annotator_split_valid.json -------------------------------------------------------------------------------- /data/analysis/calc_accuracies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/calc_accuracies.py -------------------------------------------------------------------------------- /data/analysis/compare_dataset_statistics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/compare_dataset_statistics.py -------------------------------------------------------------------------------- /data/analysis/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/convert.py -------------------------------------------------------------------------------- /data/analysis/convert_zeroshot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/convert_zeroshot.py -------------------------------------------------------------------------------- /data/analysis/counter2key_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/counter2key_test.json -------------------------------------------------------------------------------- /data/analysis/counter2key_train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/counter2key_train.json -------------------------------------------------------------------------------- /data/analysis/counter2key_val.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/counter2key_val.json -------------------------------------------------------------------------------- /data/analysis/img_similarity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/img_similarity.py -------------------------------------------------------------------------------- /data/analysis/manual_annotation_valid.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/manual_annotation_valid.yaml -------------------------------------------------------------------------------- /data/analysis/shortid2id.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/analysis/shortid2id.json -------------------------------------------------------------------------------- /data/test_data_unlabeled.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/test_data_unlabeled.json -------------------------------------------------------------------------------- /data/train_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/train_data.json -------------------------------------------------------------------------------- /data/train_simple.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/train_simple.json -------------------------------------------------------------------------------- /data/valid_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/valid_data.json -------------------------------------------------------------------------------- /data/valid_simple.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/valid_simple.json -------------------------------------------------------------------------------- /data/vilbert_data_format/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/vilbert_data_format/test.json -------------------------------------------------------------------------------- /data/vilbert_data_format/test_target.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/vilbert_data_format/test_target.pkl -------------------------------------------------------------------------------- /data/vilbert_data_format/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/vilbert_data_format/train.json -------------------------------------------------------------------------------- /data/vilbert_data_format/train_target.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/vilbert_data_format/train_target.pkl -------------------------------------------------------------------------------- /data/vilbert_data_format/trainval_ans2label.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/vilbert_data_format/trainval_ans2label.pkl -------------------------------------------------------------------------------- /data/vilbert_data_format/trainval_label2ans.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/vilbert_data_format/trainval_label2ans.pkl -------------------------------------------------------------------------------- /data/vilbert_data_format/val.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/vilbert_data_format/val.json -------------------------------------------------------------------------------- /data/vilbert_data_format/val_target.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/data/vilbert_data_format/val_target.pkl -------------------------------------------------------------------------------- /example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/example.png -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/McGill-NLP/imagecode/HEAD/install.sh --------------------------------------------------------------------------------