├── .circleci
└── config.yml
├── .editorconfig
├── .flake8
├── .github
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── ISSUE_TEMPLATE
│ ├── bug-report.md
│ ├── config.yml
│ ├── feature-request.md
│ ├── questions-help-support.md
│ └── unexpected-problems.md
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ ├── cpu_test.yaml
│ ├── deploy_website.yaml
│ └── linter_test.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── MANIFEST.in
├── NOTICES
├── PACKAGE
├── README.md
├── docs
├── .gitignore
├── Makefile
├── license_header.txt
├── requirements.txt
└── source
│ ├── _static
│ ├── css
│ │ └── customize.css
│ ├── images
│ │ ├── chevron-right-orange.svg
│ │ ├── chevron_blue.svg
│ │ ├── favicon.png
│ │ ├── logo_icon.svg
│ │ ├── mmf_logo.png
│ │ ├── mmf_logo.svg
│ │ ├── mmf_logo_white_f.svg
│ │ ├── search_icon.svg
│ │ └── view-page-source-icon.svg
│ └── js
│ │ ├── ga.js
│ │ └── redirect.js
│ ├── _templates
│ ├── layout.html
│ └── theme_variables.jinja
│ ├── conf.py
│ ├── index.rst
│ └── lib
│ ├── common
│ ├── registry.rst
│ └── sample.rst
│ ├── datasets
│ ├── base_dataset.rst
│ ├── base_dataset_builder.rst
│ └── processors.rst
│ ├── models
│ └── base_model.rst
│ ├── modules
│ ├── losses.rst
│ └── metrics.rst
│ └── utils
│ └── text.rst
├── mmf
├── __init__.py
├── common
│ ├── __init__.py
│ ├── batch_collator.py
│ ├── constants.py
│ ├── dataset_loader.py
│ ├── meter.py
│ ├── registry.py
│ ├── report.py
│ ├── sample.py
│ ├── test_reporter.py
│ └── typings.py
├── configs
│ ├── datasets
│ │ ├── airstore
│ │ │ └── defaults.yaml
│ │ ├── charades
│ │ │ └── defaults.yaml
│ │ ├── clevr
│ │ │ └── defaults.yaml
│ │ ├── coco
│ │ │ ├── defaults.yaml
│ │ │ ├── detection.yaml
│ │ │ ├── masked.yaml
│ │ │ └── ocr_en.yaml
│ │ ├── coco2017
│ │ │ └── masked.yaml
│ │ ├── conceptual_captions
│ │ │ ├── defaults.yaml
│ │ │ ├── masked.yaml
│ │ │ └── train_small.yaml
│ │ ├── flickr30k
│ │ │ └── masked.yaml
│ │ ├── glue
│ │ │ └── defaults.yaml
│ │ ├── gqa
│ │ │ ├── defaults.yaml
│ │ │ └── masked.yaml
│ │ ├── hateful_memes
│ │ │ ├── bert.yaml
│ │ │ ├── defaults.yaml
│ │ │ ├── fine_grained
│ │ │ │ ├── attack_vectors.yaml
│ │ │ │ ├── defaults.yaml
│ │ │ │ ├── hateful_pc_attack.yaml
│ │ │ │ ├── pc_attack.yaml
│ │ │ │ ├── protected_groups.yaml
│ │ │ │ └── with_features.yaml
│ │ │ └── with_features.yaml
│ │ ├── localized_narratives
│ │ │ └── masked.yaml
│ │ ├── mmimdb
│ │ │ ├── defaults.yaml
│ │ │ ├── masked.yaml
│ │ │ └── with_features.yaml
│ │ ├── nlvr2
│ │ │ └── defaults.yaml
│ │ ├── ocrvqa
│ │ │ └── defaults.yaml
│ │ ├── okvqa
│ │ │ └── defaults.yaml
│ │ ├── retrieval
│ │ │ └── flickr30k_defaults.yaml
│ │ ├── sbu_captions
│ │ │ └── masked.yaml
│ │ ├── stvqa
│ │ │ └── defaults.yaml
│ │ ├── textcaps
│ │ │ └── defaults.yaml
│ │ ├── textvqa
│ │ │ ├── defaults.yaml
│ │ │ └── with_resnet.yaml
│ │ ├── vinvl
│ │ │ └── defaults.yaml
│ │ ├── visual_dialog
│ │ │ └── defaults.yaml
│ │ ├── visual_entailment
│ │ │ └── defaults.yaml
│ │ ├── visual_genome
│ │ │ ├── defaults.yaml
│ │ │ ├── detection.yaml
│ │ │ └── masked.yaml
│ │ ├── vizwiz
│ │ │ └── defaults.yaml
│ │ ├── vqa2
│ │ │ ├── defaults.yaml
│ │ │ ├── masked.yaml
│ │ │ ├── masked_q.yaml
│ │ │ ├── train_val.yaml
│ │ │ └── with_raw_images.yaml
│ │ └── vqacp_v2
│ │ │ └── defaults.yaml
│ ├── defaults.yaml
│ ├── models
│ │ ├── alignment
│ │ │ └── defaults.yaml
│ │ ├── ban
│ │ │ └── defaults.yaml
│ │ ├── butd
│ │ │ └── defaults.yaml
│ │ ├── cnn_lstm
│ │ │ └── defaults.yaml
│ │ ├── fusions
│ │ │ ├── concat_bert.yaml
│ │ │ ├── concat_bow.yaml
│ │ │ ├── defaults.yaml
│ │ │ └── late_fusion.yaml
│ │ ├── krisp
│ │ │ └── defaults.yaml
│ │ ├── lorra
│ │ │ └── defaults.yaml
│ │ ├── lxmert
│ │ │ ├── defaults.yaml
│ │ │ └── pretrain.yaml
│ │ ├── m4c
│ │ │ └── defaults.yaml
│ │ ├── m4c_captioner
│ │ │ └── defaults.yaml
│ │ ├── mmbt
│ │ │ ├── classification.yaml
│ │ │ ├── defaults.yaml
│ │ │ ├── pretrain.yaml
│ │ │ └── with_features.yaml
│ │ ├── mmf_bert
│ │ │ └── defaults.yaml
│ │ ├── mmf_transformer
│ │ │ ├── defaults.yaml
│ │ │ ├── pretrain.yaml
│ │ │ └── with_audio_video.yaml
│ │ ├── movie_mcan
│ │ │ └── defaults.yaml
│ │ ├── pythia
│ │ │ └── defaults.yaml
│ │ ├── unimodal
│ │ │ ├── bert.yaml
│ │ │ ├── image.yaml
│ │ │ ├── text.yaml
│ │ │ └── with_features.yaml
│ │ ├── unit
│ │ │ └── defaults.yaml
│ │ ├── uniter
│ │ │ └── defaults.yaml
│ │ ├── vilbert
│ │ │ ├── defaults.yaml
│ │ │ └── pretrain.yaml
│ │ ├── vilt
│ │ │ └── defaults.yaml
│ │ ├── vinvl
│ │ │ └── defaults.yaml
│ │ └── visual_bert
│ │ │ ├── classification.yaml
│ │ │ ├── defaults.yaml
│ │ │ └── pretrain.yaml
│ └── zoo
│ │ ├── datasets.yaml
│ │ └── models.yaml
├── datasets
│ ├── __init__.py
│ ├── base_dataset.py
│ ├── base_dataset_builder.py
│ ├── builders
│ │ ├── __init__.py
│ │ ├── airstore
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── charades
│ │ │ ├── __init__.py
│ │ │ ├── _utils.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── clevr
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── coco
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── dataset.py
│ │ │ ├── detection_builder.py
│ │ │ ├── detection_dataset.py
│ │ │ ├── masked_builder.py
│ │ │ └── masked_dataset.py
│ │ ├── coco2017
│ │ │ ├── __init__.py
│ │ │ ├── masked_builder.py
│ │ │ └── masked_dataset.py
│ │ ├── conceptual_captions
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── dataset.py
│ │ │ ├── masked_builder.py
│ │ │ └── masked_dataset.py
│ │ ├── flickr30k
│ │ │ ├── __init__.py
│ │ │ ├── masked_builder.py
│ │ │ └── masked_dataset.py
│ │ ├── glue
│ │ │ ├── __init__.py
│ │ │ └── builder.py
│ │ ├── gqa
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── dataset.py
│ │ │ ├── masked_builder.py
│ │ │ └── masked_dataset.py
│ │ ├── hateful_memes
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── localized_narratives
│ │ │ ├── __init__.py
│ │ │ ├── database.py
│ │ │ ├── masked_builder.py
│ │ │ └── masked_dataset.py
│ │ ├── mmimdb
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── dataset.py
│ │ │ ├── masked_builder.py
│ │ │ └── masked_dataset.py
│ │ ├── nlvr2
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── ocrvqa
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── okvqa
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── database.py
│ │ │ └── dataset.py
│ │ ├── retrieval
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── dataset.py
│ │ │ └── datasets.py
│ │ ├── sbu_captions
│ │ │ ├── __init__.py
│ │ │ ├── masked_builder.py
│ │ │ └── masked_dataset.py
│ │ ├── stvqa
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── textcaps
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── textvqa
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── vinvl
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── visual_dialog
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── database.py
│ │ │ └── dataset.py
│ │ ├── visual_entailment
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── visual_genome
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── dataset.py
│ │ │ ├── detection_builder.py
│ │ │ ├── detection_dataset.py
│ │ │ ├── masked_builder.py
│ │ │ └── masked_dataset.py
│ │ ├── vizwiz
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ └── dataset.py
│ │ ├── vqa2
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── dataset.py
│ │ │ ├── masked_builder.py
│ │ │ ├── masked_dataset.py
│ │ │ ├── masked_q_vqa2_builder.py
│ │ │ ├── masked_q_vqa2_dataset.py
│ │ │ ├── ocr_builder.py
│ │ │ └── ocr_dataset.py
│ │ └── vqacp_v2
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── database.py
│ │ │ └── dataset.py
│ ├── concat_dataset.py
│ ├── databases
│ │ ├── __init__.py
│ │ ├── annotation_database.py
│ │ ├── features_database.py
│ │ ├── image_database.py
│ │ ├── readers
│ │ │ ├── __init__.py
│ │ │ └── feature_readers.py
│ │ └── scene_graph_database.py
│ ├── iteration_strategies.py
│ ├── lightning_multi_datamodule.py
│ ├── lightning_multi_dataset_loader.py
│ ├── mmf_dataset.py
│ ├── mmf_dataset_builder.py
│ ├── multi_datamodule.py
│ ├── multi_dataset_loader.py
│ ├── processors
│ │ ├── __init__.py
│ │ ├── bert_processors.py
│ │ ├── detection_transforms.py
│ │ ├── frcnn_processor.py
│ │ ├── functional.py
│ │ ├── image_processors.py
│ │ ├── prediction_processors.py
│ │ ├── processors.py
│ │ └── video_processors.py
│ └── subset_dataset.py
├── models
│ ├── __init__.py
│ ├── albef
│ │ ├── __init__.py
│ │ └── vit.py
│ ├── alignment.py
│ ├── ban.py
│ ├── base_model.py
│ ├── butd.py
│ ├── cnn_lstm.py
│ ├── frcnn.py
│ ├── fusions.py
│ ├── interfaces
│ │ ├── __init__.py
│ │ └── mmbt.py
│ ├── krisp.py
│ ├── lorra.py
│ ├── lxmert.py
│ ├── m4c.py
│ ├── m4c_captioner.py
│ ├── mmbt.py
│ ├── mmf_bert.py
│ ├── mmf_transformer.py
│ ├── movie_mcan.py
│ ├── pythia.py
│ ├── top_down_bottom_up.py
│ ├── transformers
│ │ ├── __init__.py
│ │ ├── backends
│ │ │ ├── __init__.py
│ │ │ └── huggingface.py
│ │ ├── base.py
│ │ └── heads
│ │ │ ├── __init__.py
│ │ │ ├── contrastive.py
│ │ │ ├── itm.py
│ │ │ ├── mlm.py
│ │ │ ├── mlp.py
│ │ │ ├── mrc.py
│ │ │ ├── mrfr.py
│ │ │ ├── refiner.py
│ │ │ ├── refnet_classifier.py
│ │ │ ├── utils.py
│ │ │ └── wra.py
│ ├── unimodal.py
│ ├── unit
│ │ ├── __init__.py
│ │ ├── backbone.py
│ │ ├── matcher.py
│ │ ├── misc.py
│ │ ├── transformer.py
│ │ ├── unit.py
│ │ └── unit_base_model.py
│ ├── uniter.py
│ ├── vilbert.py
│ ├── vilt.py
│ ├── vinvl.py
│ ├── visdial_multi_modal.py
│ └── visual_bert.py
├── modules
│ ├── __init__.py
│ ├── attention.py
│ ├── bottleneck.py
│ ├── decoders.py
│ ├── embeddings.py
│ ├── encoders.py
│ ├── fusions.py
│ ├── hf_layers.py
│ ├── layers.py
│ ├── losses.py
│ ├── metrics.py
│ ├── optimizers.py
│ ├── ot.py
│ ├── poolers.py
│ ├── schedulers.py
│ └── vit.py
├── projects
├── trainers
│ ├── __init__.py
│ ├── base_trainer.py
│ ├── callbacks
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── checkpoint.py
│ │ ├── early_stopping.py
│ │ ├── logistics.py
│ │ └── lr_scheduler.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── callback_hook.py
│ │ ├── device.py
│ │ ├── evaluation_loop.py
│ │ ├── profiling.py
│ │ └── training_loop.py
│ ├── lightning_core
│ │ ├── __init__.py
│ │ ├── loop_callback.py
│ │ ├── loop_callback_with_torchmetrics.py
│ │ └── torchmetric.py
│ ├── lightning_trainer.py
│ └── mmf_trainer.py
├── utils
│ ├── __init__.py
│ ├── box_ops.py
│ ├── build.py
│ ├── checkpoint.py
│ ├── checkpoint_updater.py
│ ├── configuration.py
│ ├── dataset.py
│ ├── distributed.py
│ ├── download.py
│ ├── early_stopping.py
│ ├── env.py
│ ├── features
│ │ ├── __init__.py
│ │ └── visualizing_image.py
│ ├── file_io.py
│ ├── flags.py
│ ├── general.py
│ ├── inference.py
│ ├── logger.py
│ ├── m4c_evaluators.py
│ ├── modeling.py
│ ├── patch.py
│ ├── phoc
│ │ ├── __init__.py
│ │ ├── build_phoc.py
│ │ └── src
│ │ │ └── cphoc.c
│ ├── process_answers.py
│ ├── text.py
│ ├── timer.py
│ ├── torchscript.py
│ ├── transform.py
│ ├── visualize.py
│ ├── vocab.py
│ └── xla.py
└── version.py
├── mmf_cli
├── __init__.py
├── hm_convert.py
├── interactive.py
├── predict.py
├── run.py
└── torchx_entryscript.py
├── projects
├── ban
│ ├── README.md
│ └── configs
│ │ ├── textvqa
│ │ └── defaults.yaml
│ │ ├── vizwiz
│ │ └── defaults.yaml
│ │ └── vqa2
│ │ └── defaults.yaml
├── butd
│ ├── README.md
│ └── configs
│ │ ├── coco
│ │ ├── beam_search.yaml
│ │ ├── defaults.yaml
│ │ └── nucleus_sampling.yaml
│ │ ├── conceptual_captions
│ │ ├── beam_search.yaml
│ │ ├── defaults.yaml
│ │ └── nucleus_sampling.yaml
│ │ └── textcaps
│ │ ├── beam_search.yaml
│ │ ├── defaults.yaml
│ │ └── eval_pretrained_coco_model.yaml
├── hateful_memes
│ ├── README.md
│ ├── configs
│ │ ├── concat_bert
│ │ │ └── defaults.yaml
│ │ ├── concat_bow
│ │ │ └── defaults.yaml
│ │ ├── late_fusion
│ │ │ └── defaults.yaml
│ │ ├── mmbt
│ │ │ ├── defaults.yaml
│ │ │ └── with_features.yaml
│ │ ├── mmf_transformer
│ │ │ └── defaults.yaml
│ │ ├── unimodal
│ │ │ ├── bert.yaml
│ │ │ ├── image.yaml
│ │ │ ├── text.yaml
│ │ │ └── with_features.yaml
│ │ ├── vilbert
│ │ │ ├── defaults.yaml
│ │ │ ├── direct.yaml
│ │ │ └── from_cc.yaml
│ │ └── visual_bert
│ │ │ ├── defaults.yaml
│ │ │ ├── direct.yaml
│ │ │ └── from_coco.yaml
│ └── fine_grained
│ │ ├── README.md
│ │ └── configs
│ │ └── visual_bert
│ │ ├── attack_vectors.yaml
│ │ ├── defaults.yaml
│ │ ├── hateful_pc_attack.yaml
│ │ ├── multilabel.yaml
│ │ ├── pc_attack.yaml
│ │ └── protected_groups.yaml
├── krisp
│ ├── README.md
│ ├── configs
│ │ ├── krisp
│ │ │ ├── okvqa
│ │ │ │ ├── conceptnet_only.yaml
│ │ │ │ ├── dbpedia_only.yaml
│ │ │ │ ├── defaults.yaml
│ │ │ │ ├── haspart_only.yaml
│ │ │ │ ├── okvqav10.yaml
│ │ │ │ ├── okvqav10_fromfullpretrain.yaml
│ │ │ │ ├── randomgraph.yaml
│ │ │ │ ├── train_val.yaml
│ │ │ │ ├── train_val_cnonly.yaml
│ │ │ │ ├── train_val_dbonly.yaml
│ │ │ │ ├── train_val_hponly.yaml
│ │ │ │ ├── train_val_okvqav10.yaml
│ │ │ │ ├── train_val_okvqav10_fromfullpretrain.yaml
│ │ │ │ ├── train_val_random.yaml
│ │ │ │ ├── train_val_vgonly.yaml
│ │ │ │ └── visualgenome_only.yaml
│ │ │ └── vqa2
│ │ │ │ └── krisp_pretrain.yaml
│ │ └── visual_bert
│ │ │ ├── masked_coco
│ │ │ └── okvqa_safe.yaml
│ │ │ ├── masked_vqa2
│ │ │ └── okvqa_safe.yaml
│ │ │ ├── okvqa
│ │ │ ├── defaults.yaml
│ │ │ ├── defaults_v10.yaml
│ │ │ ├── train_val.yaml
│ │ │ └── train_val_okvqav10.yaml
│ │ │ └── vqa2
│ │ │ └── defaults_okvqasafe.yaml
│ ├── graphnetwork_module.py
│ └── requirements.txt
├── lorra
│ ├── README.md
│ └── configs
│ │ ├── textvqa
│ │ └── defaults.yaml
│ │ ├── vizwiz
│ │ └── defaults.yaml
│ │ └── vqa2
│ │ ├── defaults.yaml
│ │ ├── train_val.yaml
│ │ └── train_val_resnet_only.yaml
├── lxmert
│ ├── README.md
│ └── configs
│ │ ├── coco
│ │ ├── masked.yaml
│ │ └── pretrain.yaml
│ │ ├── defaults.yaml
│ │ ├── gqa
│ │ ├── masked.yaml
│ │ └── pretrain.yaml
│ │ ├── pretrain.yaml
│ │ ├── visual_genome
│ │ ├── masked.yaml
│ │ └── pretrain.yaml
│ │ └── vqa2
│ │ ├── defaults.yaml
│ │ ├── masked.yaml
│ │ └── pretrain.yaml
├── m4c
│ ├── README.md
│ ├── configs
│ │ ├── ocrvqa
│ │ │ └── defaults.yaml
│ │ ├── stvqa
│ │ │ └── defaults.yaml
│ │ └── textvqa
│ │ │ ├── defaults.yaml
│ │ │ ├── joint_with_stvqa.yaml
│ │ │ └── ocr_ml.yaml
│ └── scripts
│ │ ├── __init__.py
│ │ └── extract_ocr_frcn_feature.py
├── m4c_captioner
│ ├── README.md
│ ├── configs
│ │ ├── butd
│ │ │ └── textcaps
│ │ └── m4c_captioner
│ │ │ ├── coco
│ │ │ ├── defaults.yaml
│ │ │ └── eval_on_textcaps.yaml
│ │ │ └── textcaps
│ │ │ ├── defaults.yaml
│ │ │ ├── joint_with_coco.yaml
│ │ │ ├── with_caffe2_feat.yaml
│ │ │ └── without_ocr.yaml
│ └── scripts
│ │ ├── __init__.py
│ │ ├── coco_eval.py
│ │ └── textcaps_eval.py
├── mmbt
│ ├── README.md
│ └── configs
│ │ ├── hateful_memes
│ │ ├── defaults.yaml
│ │ ├── hateful_with_refiner.yaml
│ │ └── with_features.yaml
│ │ ├── masked_coco
│ │ └── defaults.yaml
│ │ ├── mmimdb
│ │ ├── defaults.yaml
│ │ ├── paper_ablations_reducedlabel.yaml
│ │ └── with_features.yaml
│ │ ├── okvqa
│ │ └── with_images.yaml
│ │ └── vqa2
│ │ └── with_raw_images.yaml
├── mmf_transformer
│ ├── configs
│ │ ├── airstore
│ │ │ └── masked_coco.yaml
│ │ ├── charades
│ │ │ └── direct.yaml
│ │ ├── hateful_memes
│ │ │ ├── defaults.yaml
│ │ │ └── hateful_with_refiner.yaml
│ │ ├── masked_coco
│ │ │ ├── defaults.yaml
│ │ │ └── pretrain_itm.yaml
│ │ ├── okvqa
│ │ │ └── defaults.yaml
│ │ └── vqa2
│ │ │ └── defaults.yaml
│ └── localized_narratives
│ │ └── masked.yaml
├── movie_mcan
│ ├── README.md
│ └── configs
│ │ └── vqa2
│ │ ├── defaults.yaml
│ │ └── e2e.yaml
├── others
│ ├── cnn_lstm
│ │ ├── clevr
│ │ │ └── defaults.yaml
│ │ └── hateful_memes
│ │ │ └── defaults.yaml
│ ├── concat_bert
│ │ └── hateful_memes
│ │ │ └── defaults.yaml
│ ├── concat_bow
│ │ └── hateful_memes
│ │ │ └── defaults.yaml
│ ├── late_fusion
│ │ └── hateful_memes
│ │ │ └── defaults.yaml
│ ├── mmf_bert
│ │ └── configs
│ │ │ ├── masked_coco
│ │ │ ├── defaults.yaml
│ │ │ ├── pretrain.yaml
│ │ │ └── pretrain_joint_vqa2.yaml
│ │ │ ├── masked_conceptual_captions
│ │ │ ├── defaults.yaml
│ │ │ └── pretrain.yaml
│ │ │ ├── masked_vqa2
│ │ │ ├── defaults.yaml
│ │ │ └── pretrain.yaml
│ │ │ ├── visual_entailment
│ │ │ └── defaults.yaml
│ │ │ ├── vizwiz
│ │ │ └── defaults.yaml
│ │ │ └── vqa2
│ │ │ └── defaults.yaml
│ └── unimodal
│ │ └── configs
│ │ └── hateful_memes
│ │ ├── bert.yaml
│ │ ├── image.yaml
│ │ ├── text.yaml
│ │ └── with_features.yaml
├── pretrain_vl_right
│ ├── README.md
│ └── configs
│ │ ├── vilbert
│ │ ├── masked_coco
│ │ │ ├── defaults.yaml
│ │ │ ├── fifty_pc.yaml
│ │ │ ├── full.yaml
│ │ │ └── ten_pc.yaml
│ │ ├── masked_conceptual_captions
│ │ │ ├── defaults.yaml
│ │ │ ├── full.yaml
│ │ │ ├── full_coco_generated.yaml
│ │ │ ├── half.yaml
│ │ │ ├── half_coco_generated.yaml
│ │ │ ├── small.yaml
│ │ │ ├── small_coco_generated.yaml
│ │ │ ├── small_fifty_pc.yaml
│ │ │ └── small_ten_pc.yaml
│ │ └── masked_vqa2
│ │ │ ├── defaults.yaml
│ │ │ ├── fifty_pc.yaml
│ │ │ ├── full.yaml
│ │ │ └── ten_pc.yaml
│ │ └── visual_bert
│ │ ├── masked_coco
│ │ ├── defaults.yaml
│ │ ├── fifty_pc.yaml
│ │ ├── full.yaml
│ │ ├── full_train_val.yaml
│ │ └── ten_pc.yaml
│ │ ├── masked_conceptual_captions
│ │ ├── defaults.yaml
│ │ ├── full.yaml
│ │ ├── full_coco_generated.yaml
│ │ ├── half.yaml
│ │ ├── half_coco_generated.yaml
│ │ ├── small.yaml
│ │ ├── small_coco_generated.yaml
│ │ ├── small_fifty_pc.yaml
│ │ └── small_ten_pc.yaml
│ │ └── masked_vqa2
│ │ ├── defaults.yaml
│ │ ├── fifty_pc.yaml
│ │ ├── full.yaml
│ │ ├── full_train_val.yaml
│ │ └── ten_pc.yaml
├── pythia
│ ├── README.md
│ └── configs
│ │ ├── masked_q_vqa2
│ │ └── defaults.yaml
│ │ ├── multihead
│ │ └── defaults.yaml
│ │ ├── textvqa
│ │ └── defaults.yaml
│ │ ├── visual_genome
│ │ └── defaults.yaml
│ │ ├── vizwiz
│ │ └── defaults.yaml
│ │ └── vqa2
│ │ ├── 12k_iterations_without_resnet.yaml
│ │ ├── debug.yaml
│ │ ├── defaults.yaml
│ │ ├── resnet_only.yaml
│ │ ├── train_val.yaml
│ │ └── train_val_resnet_only.yaml
├── unit
│ ├── README.md
│ └── configs
│ │ ├── all_8_datasets
│ │ ├── separate_dec.yaml
│ │ ├── shared_dec.yaml
│ │ └── shared_dec_without_task_embedding.yaml
│ │ ├── coco
│ │ ├── single_task.yaml
│ │ └── single_task_without_task_embedding.yaml
│ │ ├── coco_vg_vqa2
│ │ ├── separate_dec.yaml
│ │ └── shared_dec.yaml
│ │ ├── coco_vqa2
│ │ ├── separate_dec.yaml
│ │ └── shared_dec.yaml
│ │ ├── vg
│ │ └── single_task.yaml
│ │ ├── vg_vqa2
│ │ ├── separate_dec.yaml
│ │ └── shared_dec.yaml
│ │ ├── visual_entailment_dataset_cfg.yaml
│ │ ├── vqa2
│ │ └── single_task.yaml
│ │ └── vqa2_dataset_cfg.yaml
├── uniter
│ ├── README.md
│ └── configs
│ │ ├── masked_coco
│ │ └── defaults.yaml
│ │ └── vqa2
│ │ └── defaults.yaml
├── vilbert
│ ├── README.md
│ └── configs
│ │ ├── hateful_memes
│ │ ├── defaults.yaml
│ │ ├── direct.yaml
│ │ └── from_cc.yaml
│ │ ├── masked_coco
│ │ ├── defaults.yaml
│ │ ├── pretrain.yaml
│ │ └── pretrain_train_val.yaml
│ │ ├── masked_conceptual_captions
│ │ ├── defaults.yaml
│ │ └── pretrain.yaml
│ │ ├── masked_vqa2
│ │ ├── defaults.yaml
│ │ ├── pretrain.yaml
│ │ └── pretrain_train_val.yaml
│ │ ├── mmimdb
│ │ ├── defaults.yaml
│ │ └── pretrain.yaml
│ │ ├── nlvr2
│ │ └── defaults.yaml
│ │ ├── visual_entailment
│ │ └── defaults.yaml
│ │ ├── vizwiz
│ │ └── defaults.yaml
│ │ └── vqa2
│ │ ├── defaults.yaml
│ │ └── train_val.yaml
├── vilt
│ ├── README.md
│ └── configs
│ │ ├── masked_coco
│ │ ├── defaults.yaml
│ │ └── pretrain.yaml
│ │ └── vqa2
│ │ ├── defaults.yaml
│ │ ├── vit_b16_224.yaml
│ │ └── vit_b32_384.yaml
├── vinvl
│ ├── README.md
│ └── configs
│ │ └── vqa2
│ │ └── defaults.yaml
└── visual_bert
│ ├── README.md
│ └── configs
│ ├── gqa
│ └── defaults.yaml
│ ├── hateful_memes
│ ├── defaults.yaml
│ ├── direct.yaml
│ └── from_coco.yaml
│ ├── localized_narratives
│ ├── defaults.yaml
│ └── pretrain.yaml
│ ├── masked_coco
│ ├── defaults.yaml
│ ├── pretrain.yaml
│ └── pretrain_train_val.yaml
│ ├── masked_conceptual_captions
│ ├── defaults.yaml
│ └── pretrain.yaml
│ ├── masked_gqa
│ └── defaults.yaml
│ ├── masked_sbu
│ ├── defaults.yaml
│ └── pretrain.yaml
│ ├── masked_vqa2
│ ├── defaults.yaml
│ ├── pretrain.yaml
│ └── pretrain_train_val.yaml
│ ├── mmimdb
│ ├── defaults.yaml
│ └── pretrain.yaml
│ ├── nlvr2
│ └── defaults.yaml
│ ├── visual_entailment
│ ├── defaults.yaml
│ └── train_val.yaml
│ ├── vizwiz
│ ├── defaults.yaml
│ └── train_val.yaml
│ └── vqa2
│ ├── defaults.yaml
│ ├── train_val.yaml
│ └── with_raw_images.yaml
├── pyproject.toml
├── requirements.txt
├── setup.py
├── tests
├── __init__.py
├── common
│ ├── __init__.py
│ ├── test_batch_collator.py
│ ├── test_meter.py
│ ├── test_report.py
│ └── test_sample.py
├── configs
│ ├── __init__.py
│ ├── test_configs_for_keys.py
│ └── test_zoo_urls.py
├── conftest.py
├── data
│ ├── user_dir
│ │ ├── __init__.py
│ │ ├── configs
│ │ │ ├── always_one.yaml
│ │ │ ├── experiment.yaml
│ │ │ └── simple.yaml
│ │ ├── datasets
│ │ │ ├── __init__.py
│ │ │ └── always_one.py
│ │ └── models
│ │ │ ├── __init__.py
│ │ │ └── simple.py
│ └── vocab.txt
├── datasets
│ ├── __init__.py
│ ├── test_base_dataset.py
│ ├── test_bert_processors.py
│ ├── test_iteration_strategies.py
│ ├── test_mmf_dataset_builder.py
│ ├── test_multi_datamodule.py
│ ├── test_multi_dataset_loader.py
│ ├── test_prediction_processors.py
│ └── test_processors.py
├── models
│ ├── __init__.py
│ ├── interfaces
│ │ ├── __init__.py
│ │ └── test_interfaces.py
│ ├── test_albef.py
│ ├── test_cnn_lstm.py
│ ├── test_mmbt.py
│ ├── test_mmf_transformer.py
│ ├── test_uniter.py
│ ├── test_vilbert.py
│ ├── test_vilt.py
│ ├── test_vinvl.py
│ ├── test_visual_bert.py
│ └── transformers
│ │ ├── __init__.py
│ │ ├── test_heads.py
│ │ └── test_heads_dict.py
├── modules
│ ├── __init__.py
│ ├── test_encoders.py
│ ├── test_fusions.py
│ ├── test_hf_layers.py
│ ├── test_layers.py
│ ├── test_losses.py
│ ├── test_metrics.py
│ ├── test_optimizers.py
│ ├── test_poolers.py
│ └── test_vit.py
├── test_utils.py
├── trainers
│ ├── __init__.py
│ ├── callbacks
│ │ ├── __init__.py
│ │ ├── test_logistics.py
│ │ ├── test_lr_scheduler.py
│ │ └── test_user_callback.py
│ ├── lightning
│ │ ├── __init__.py
│ │ ├── lightning_trainer_mock.py
│ │ ├── test_checkpoint.py
│ │ ├── test_grad_accumulate.py
│ │ ├── test_grad_clipping.py
│ │ ├── test_logging.py
│ │ ├── test_loop_conditions.py
│ │ ├── test_loss.py
│ │ ├── test_lr_schedule.py
│ │ └── test_validation.py
│ ├── test_device.py
│ ├── test_eval_loop.py
│ ├── test_fp16.py
│ ├── test_sharded_ddp.py
│ ├── test_trainer_mocks.py
│ ├── test_training_loop.py
│ └── test_utils.py
└── utils
│ ├── __init__.py
│ ├── test_checkpoint.py
│ ├── test_configuration.py
│ ├── test_distributed.py
│ ├── test_download.py
│ ├── test_env.py
│ ├── test_file_io.py
│ ├── test_general.py
│ ├── test_logger.py
│ ├── test_model.py
│ ├── test_patch.py
│ ├── test_quality_checks.py
│ ├── test_text.py
│ ├── test_timer.py
│ └── test_visualize.py
├── tools
├── __init__.py
├── scripts
│ ├── __init__.py
│ ├── bert
│ │ ├── extract_bert.sh
│ │ └── extract_bert_embeddings.py
│ ├── coco
│ │ └── coco_caption_eval.py
│ ├── features
│ │ ├── extract_features_vinvl.py
│ │ ├── extract_features_vmb.py
│ │ ├── extract_resnet152_feat.py
│ │ ├── extraction_utils.py
│ │ ├── frcnn
│ │ │ ├── extract_features_frcnn.py
│ │ │ ├── frcnn_utils.py
│ │ │ ├── modeling_frcnn.py
│ │ │ └── processing_image.py
│ │ └── lmdb_conversion.py
│ ├── gqa
│ │ ├── README.md
│ │ ├── convert_gqa_to_vqa.py
│ │ └── extract_vocabulary.py
│ ├── tests
│ │ └── generate_test_data.py
│ └── visual_dialog
│ │ ├── build_imdb.py
│ │ └── extract_vocabulary.py
└── sweeps
│ ├── README.md
│ ├── lib
│ ├── __init__.py
│ └── slurm.py
│ └── sweep_visual_bert.py
└── website
├── .eslintignore
├── .eslintrc.js
├── .gitignore
├── .prettierignore
├── .prettierrc
├── .stylelintrc.js
├── README.md
├── build_docs.sh
├── docs
├── challenges
│ ├── hateful_memes_challenge.md
│ ├── textvqa_challenge.md
│ └── vqa_challenge.md
├── getting_started
│ ├── faqs.md
│ ├── features.md
│ ├── installation.mdx
│ ├── quickstart.md
│ └── video_overview.md
├── notes
│ ├── concepts.md
│ ├── configuration.md
│ ├── dataset_zoo.md
│ ├── logging.md
│ ├── model_zoo.md
│ ├── pretrained_models.md
│ ├── projects.md
│ └── training_tricks.md
├── projects
│ ├── butd.md
│ ├── m4c.md
│ ├── m4c_captioner.md
│ ├── movie_mcan.md
│ ├── unit.md
│ ├── uniter.md
│ ├── vilt.md
│ └── vinvl.md
└── tutorials
│ ├── checkpointing.md
│ ├── concat_bert_tutorial.md
│ ├── dataset.md
│ ├── image_feature_extraction.md
│ ├── image_feature_extraction_vinvl.md
│ ├── losses.md
│ ├── metrics.md
│ ├── processors.md
│ ├── pytorchvideo.md
│ └── slurm.md
├── docusaurus.config.js
├── package.json
├── sidebars.js
├── src
├── css
│ └── custom.css
└── pages
│ ├── api_redirect
│ └── index.js
│ ├── index.js
│ └── styles.module.css
├── static
├── .circleci
│ └── config.yml
├── .nojekyll
├── CNAME
└── img
│ ├── banner_logo.svg
│ ├── boilerplate.svg
│ ├── boilerplate_white.svg
│ ├── favicon.png
│ ├── logo.png
│ ├── logo.svg
│ ├── logo_white_f.png
│ ├── logo_white_f.svg
│ ├── logo_white_text.svg
│ ├── oss_logo.png
│ ├── puzzle_pieces.svg
│ ├── puzzle_pieces_white.svg
│ ├── pytorch_logo.svg
│ ├── pytorch_logo_white.svg
│ ├── undraw_docusaurus_react.svg
│ └── undraw_docusaurus_tree.svg
└── yarn.lock
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*.py]
4 | charset = utf-8
5 | trim_trailing_whitespace = true
6 | end_of_line = lf
7 | insert_final_newline = true
8 | indent_style = space
9 | indent_size = 4
10 |
11 | [*.md]
12 | trim_trailing_whitespace = false
13 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | # This is an example .flake8 config used when developing *Black* itself.
2 |
3 | [flake8]
4 | max-line-length = 88
5 | max-complexity = 18
6 | select = B,C,E,F,W,T4,B9
7 | ignore = E203, E266, C901, C408, W503
8 |
--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 |
4 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
5 | Please read the [full text](https://code.fb.com/codeofconduct/)
6 | so that you can understand what actions will and will not be tolerated.
7 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | # require an issue template to be chosen
2 | blank_issues_enabled: false
3 |
4 | contact_links:
5 | - name: MMF Documentation
6 | url: https://mmf.sh/docs
7 | about: Check if your issue/documentation is already answered in docs
8 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "\U0001F680Feature Request"
3 | about: Submit a proposal/request for a new MMF feature
4 |
5 | ---
6 |
7 | ## 🚀 Feature
8 |
9 |
10 | ## Motivation
11 |
12 |
13 |
14 | ## Pitch
15 |
16 |
17 |
18 | ## Alternatives
19 |
20 |
21 |
22 | ## Additional context
23 |
24 |
25 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/questions-help-support.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "❓Questions/Help/Support"
3 | about: Do you need support?
4 |
5 | ---
6 |
7 | ## ❓ Questions and Help
8 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | Thanks for your contribution!
2 |
3 | If you're sending a large PR (e.g., >50 lines), please open an issue first about
4 | the feature/bug, and indicate how you want to contribute.
5 |
6 | Use [contributing guidelines](https://github.com/facebookresearch/mmf/tree/main/.github/CONTRIBUTING.md) before opening up the PR to follow MMF style guidelines.
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.log
2 | *.err
3 | *.pyc
4 | *.swp
5 | .idea/*
6 | **/__pycache__/*
7 | **/output/*
8 | data/.DS_Store
9 | docs/build
10 | results/*
11 | build
12 | dist
13 | boards/*
14 | *.egg-info/
15 | checkpoint
16 | *.pth
17 | *.ckpt
18 | *_cache
19 | .cache
20 | data
21 | save
22 | *.eggs
23 | .eggs
24 | eggs/
25 | *.egg
26 | .DS_Store
27 | .vscode
28 | .vscode/*
29 | *.so
30 | *-checkpoint.ipynb
31 | !tests/data
32 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include LICENSE
3 | include NOTICES
4 | recursive-include mmf/configs/ *.yaml
5 | recursive-include projects/ *.yaml
6 |
--------------------------------------------------------------------------------
/PACKAGE:
--------------------------------------------------------------------------------
1 | load("@fbcode_macros//build_defs/lib:third_party.bzl", "third_party")
2 |
3 | third_party.gen_overrides({"pypi/transformers": "3.4.0-transitional"})
4 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | # Needed to ignore pytorch_sphinx_theme requirement clone
2 | src
3 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = mmf
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/license_header.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | recommonmark==0.5.0
2 | sphinx
3 | sphinx_rtd_theme==0.4.3
4 | sphinxcontrib-programoutput==0.16
5 | -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
6 |
--------------------------------------------------------------------------------
/docs/source/_static/images/chevron-right-orange.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
6 | Page 1
7 | Created with Sketch.
8 |
9 |
10 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/docs/source/_static/images/chevron_blue.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
7 | Page 1
8 | Created with Sketch.
9 |
10 |
11 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/docs/source/_static/images/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/docs/source/_static/images/favicon.png
--------------------------------------------------------------------------------
/docs/source/_static/images/mmf_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/docs/source/_static/images/mmf_logo.png
--------------------------------------------------------------------------------
/docs/source/_static/images/view-page-source-icon.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/docs/source/_static/js/ga.js:
--------------------------------------------------------------------------------
1 | window.dataLayer = window.dataLayer || [];
2 | function gtag(){dataLayer.push(arguments);}
3 | gtag('js', new Date());
4 |
5 | gtag('config', 'UA-135079836-3');
6 |
--------------------------------------------------------------------------------
/docs/source/_static/js/redirect.js:
--------------------------------------------------------------------------------
1 | // Redirect for older pythia documentation
2 | (function(l) {
3 | if (window.location.href.indexOf('readthedocs') !== -1) {
4 | window.location.href = "https://mmf.sh/api";
5 | }
6 | }(window.location));
7 |
--------------------------------------------------------------------------------
/docs/source/_templates/theme_variables.jinja:
--------------------------------------------------------------------------------
1 | {%-
2 | set external_urls = {
3 | 'github': 'https://github.com/facebookresearch/mmf',
4 | 'github_issues': 'https://github.com/facebookresearch/mmf/issues',
5 | 'contributing': 'https://github.com/facebookresearch/mmf/blob/main/CONTRIBUTING.md',
6 | 'api': 'https://mmf.sh/api',
7 | 'docs': 'https://mmf.sh/docs',
8 | 'previous_pytorch_versions': 'https://mmf.sh/previous-versions/',
9 | 'home': 'https://mmf.sh/',
10 | 'get_started': 'https://mmf.sh/docs',
11 | 'features': 'https://mmf.sh/docs/getting_started/features',
12 | 'brand_guidelines': 'https://pytorch.org/assets/brand-guidelines/PyTorch-Brand-Guidelines.pdf'
13 | }
14 | -%}
15 | {%-
16 | set og = {
17 | 'description': 'API docs for MMF. MMF is a modular framework powered by PyTorch for multimodal vision and language research from Facebook AI Research'
18 | }
19 | -%}
20 |
--------------------------------------------------------------------------------
/docs/source/lib/common/registry.rst:
--------------------------------------------------------------------------------
1 | common.registry
2 | ===============
3 |
4 | .. automodule:: mmf.common.registry
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/lib/common/sample.rst:
--------------------------------------------------------------------------------
1 | common.sample
2 | ===============
3 |
4 | .. automodule:: mmf.common.sample
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/lib/datasets/base_dataset.rst:
--------------------------------------------------------------------------------
1 | datasets.base_dataset
2 | =====================
3 |
4 | .. automodule:: mmf.datasets.base_dataset
5 | :members:
6 | :private-members:
7 |
--------------------------------------------------------------------------------
/docs/source/lib/datasets/base_dataset_builder.rst:
--------------------------------------------------------------------------------
1 | datasets.base_dataset_builder
2 | =============================
3 |
4 | .. automodule:: mmf.datasets.base_dataset_builder
5 | :members:
6 | :private-members:
7 |
--------------------------------------------------------------------------------
/docs/source/lib/datasets/processors.rst:
--------------------------------------------------------------------------------
1 | datasets.processors
2 | ===================
3 |
4 | .. automodule:: mmf.datasets.processors.processors
5 | :members:
6 | :private-members:
7 |
8 | .. automodule:: mmf.datasets.processors.image_processors
9 | :members:
10 | :private-members:
11 |
12 | .. automodule:: mmf.datasets.processors.bert_processors
13 | :members:
14 | :private-members:
15 |
--------------------------------------------------------------------------------
/docs/source/lib/models/base_model.rst:
--------------------------------------------------------------------------------
1 | models.base_model
2 | =================
3 |
4 | .. automodule:: mmf.models.base_model
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/lib/modules/losses.rst:
--------------------------------------------------------------------------------
1 | modules.losses
2 | ===============
3 |
4 | .. automodule:: mmf.modules.losses
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/lib/modules/metrics.rst:
--------------------------------------------------------------------------------
1 | modules.metrics
2 | ===============
3 |
4 | .. automodule:: mmf.modules.metrics
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/lib/utils/text.rst:
--------------------------------------------------------------------------------
1 | utils.text
2 | ===============
3 |
4 | .. automodule:: mmf.utils.text
5 | :members:
6 |
--------------------------------------------------------------------------------
/mmf/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # isort:skip_file
3 | # flake8: noqa: F401
4 | from mmf.utils.patch import patch_transformers
5 |
6 | patch_transformers()
7 |
8 | from mmf import common, datasets, models, modules, utils
9 | from mmf.modules import losses, metrics, optimizers, poolers, schedulers
10 | from mmf.version import __version__
11 |
12 |
13 | __all__ = [
14 | "utils",
15 | "common",
16 | "modules",
17 | "datasets",
18 | "models",
19 | "losses",
20 | "poolers",
21 | "schedulers",
22 | "optimizers",
23 | "metrics",
24 | ]
25 |
--------------------------------------------------------------------------------
/mmf/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .meter import Meter
3 | from .registry import registry
4 | from .sample import Sample, SampleList
5 |
6 |
7 | __all__ = ["Sample", "SampleList", "Meter", "registry"]
8 |
--------------------------------------------------------------------------------
/mmf/common/batch_collator.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.sample import convert_batch_to_sample_list
3 |
4 |
5 | class BatchCollator:
6 | def __init__(self, dataset_name, dataset_type):
7 | self._dataset_name = dataset_name
8 | self._dataset_type = dataset_type
9 |
10 | def __call__(self, batch):
11 | sample_list = convert_batch_to_sample_list(batch)
12 | sample_list.dataset_name = self._dataset_name
13 | sample_list.dataset_type = self._dataset_type
14 | return sample_list
15 |
--------------------------------------------------------------------------------
/mmf/common/typings.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from dataclasses import dataclass
3 | from typing import Any, Dict, List
4 |
5 |
6 | @dataclass
7 | class PerSetAttributeType:
8 | train: List[str]
9 | val: List[str]
10 | test: List[str]
11 |
12 |
13 | @dataclass
14 | class ProcessorConfigType:
15 | type: str
16 | params: Dict[str, Any]
17 |
18 |
19 | @dataclass
20 | class MMFDatasetConfigType:
21 | data_dir: str
22 | use_images: bool
23 | use_features: bool
24 | zoo_requirements: List[str]
25 | images: PerSetAttributeType
26 | features: PerSetAttributeType
27 | annotations: PerSetAttributeType
28 | processors: Dict[str, ProcessorConfigType]
29 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/clevr/defaults.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | clevr:
3 | data_dir: ${env.data_dir}
4 | data_folder: CLEVR_v1.0
5 | build_attributes:
6 | min_count: 1
7 | split_regex: " "
8 | keep:
9 | - ";"
10 | - ","
11 | remove:
12 | - "?"
13 | - "."
14 | processors:
15 | text_processor:
16 | type: vocab
17 | params:
18 | max_length: 10
19 | vocab:
20 | type: random
21 | vocab_file: vocabs/clevr_question_vocab.txt
22 | preprocessor:
23 | type: simple_sentence
24 | params: {}
25 | answer_processor:
26 | type: multi_hot_answer_from_vocab
27 | params:
28 | num_answers: 1
29 | # Vocab file is relative to [data_dir]/[data_folder]
30 | vocab_file: vocabs/clevr_answer_vocab.txt
31 | preprocessor:
32 | type: simple_word
33 | params: {}
34 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/conceptual_captions/train_small.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | conceptual_captions:
3 | annotations:
4 | train:
5 | - cc/defaults/annotations/train_small.npy
6 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/bert.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | hateful_memes:
3 | processors:
4 | text_processor:
5 | type: bert_tokenizer
6 | params:
7 | tokenizer_config:
8 | type: bert-base-uncased
9 | params:
10 | do_lower_case: true
11 | mask_probability: 0
12 | max_seq_length: 128
13 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/attack_vectors.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./with_features.yaml
3 |
4 | dataset_config:
5 | hateful_memes:
6 | fg_dataset_type: attack
7 | is_multilabel: true
8 | processors:
9 | answer_processor:
10 | type: multi_hot_answer_from_vocab
11 | params:
12 | num_answers: 1
13 | vocab_file: hateful_memes/fine_grained/labels/attack_vocab.txt
14 | preprocessor:
15 | type: simple_word
16 | params: {}
17 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/defaults.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | hateful_memes:
3 | zoo_requirements:
4 | - hateful_memes.defaults
5 | - hateful_memes.fine_grained
6 | annotations:
7 | train:
8 | - hateful_memes/fine_grained/annotations/train_fg.jsonl
9 | val:
10 | - hateful_memes/fine_grained/annotations/dev_seen_fg.jsonl
11 | test:
12 | - hateful_memes/fine_grained/annotations/test_seen_fg.jsonl
13 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/hateful_pc_attack.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./with_features.yaml
3 |
4 | dataset_config:
5 | hateful_memes:
6 | fg_dataset_type: hateful_pc_attack
7 | is_multilabel: true
8 | processors:
9 | answer_processor:
10 | type: multi_hot_answer_from_vocab
11 | params:
12 | num_answers: 1
13 | vocab_file: hateful_memes/fine_grained/labels/hateful_pc_attack_vocab.txt
14 | preprocessor:
15 | type: simple_word
16 | params: {}
17 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/pc_attack.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./with_features.yaml
3 |
4 | dataset_config:
5 | hateful_memes:
6 | fg_dataset_type: pc_attack
7 | is_multilabel: true
8 | processors:
9 | answer_processor:
10 | type: multi_hot_answer_from_vocab
11 | params:
12 | num_answers: 1
13 | vocab_file: hateful_memes/fine_grained/labels/pc_attack_vocab.txt
14 | preprocessor:
15 | type: simple_word
16 | params: {}
17 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/protected_groups.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./with_features.yaml
3 |
4 | dataset_config:
5 | hateful_memes:
6 | fg_dataset_type: pc
7 | is_multilabel: true
8 | processors:
9 | answer_processor:
10 | type: multi_hot_answer_from_vocab
11 | params:
12 | num_answers: 1
13 | vocab_file: hateful_memes/fine_grained/labels/pc_vocab.txt
14 | preprocessor:
15 | type: simple_word
16 | params: {}
17 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/with_features.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | hateful_memes:
6 | use_images: false
7 | use_features: true
8 | # Disable this in your config if you do not need features info
9 | # and are running out of memory
10 | return_features_info: true
11 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/with_features.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | hateful_memes:
3 | use_images: false
4 | use_features: true
5 | # Disable this in your config if you do not need features info
6 | # and are running out of memory
7 | return_features_info: true
8 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/mmimdb/with_features.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | mmimdb:
3 | use_images: false
4 | use_features: true
5 | # Disable this in your config if you do not need features info
6 | # and are running out of memory
7 | return_features_info: false
8 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/textvqa/with_resnet.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | textvqa:
3 | features:
4 | train:
5 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/defaults/features/open_images/resnet152.lmdb
6 | val:
7 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/defaults/features/open_images/resnet152.lmdb
8 | test:
9 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/defaults/features/open_images/resnet152.lmdb
10 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/vinvl/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../vqa2/defaults.yaml
3 |
4 | dataset_config:
5 | vinvl:
6 | base_dataset_name: vqa2
7 | label_map: /private/home/ryanjiang/winoground/pretrained_models/VG-SGG-dicts-vgoi6-clipped.json
8 | base_dataset: ${dataset_config.vqa2}
9 | processors:
10 | text_processor:
11 | type: vinvl_text_tokenizer
12 | params:
13 | mask_probability: 0
14 |
--------------------------------------------------------------------------------
/mmf/configs/datasets/vqa2/with_raw_images.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | vqa2:
3 | use_images: true
4 | use_features: false
5 | processors:
6 | image_processor:
7 | type: torchvision_transforms
8 | params:
9 | transforms:
10 | - type: Resize
11 | params:
12 | size: [256, 256]
13 | - type: CenterCrop
14 | params:
15 | size: [224, 224]
16 | - ToTensor
17 | - GrayScaleTo3Channels
18 | - type: Normalize
19 | params:
20 | mean: [0.46777044, 0.44531429, 0.40661017]
21 | std: [0.12221994, 0.12145835, 0.14380469]
22 |
--------------------------------------------------------------------------------
/mmf/configs/models/ban/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | ban:
3 | losses:
4 | - type: logit_bce
5 | text_embedding:
6 | num_hidden: 1280
7 | vocab_size: 1280
8 | emb_size: 300
9 | num_layers: 1
10 | dropout: 0.0
11 | bidirectional: False
12 | rnn_type: 'GRU'
13 | bilinear_attention:
14 | bc_net:
15 | k: 1
16 | dropout: [0.2, 0.5]
17 | h_out:
18 | fc_net:
19 | dims: 600
20 | activation:
21 | dropout: 0.2
22 | gamma: 4
23 | visual_feat_dim: 2048
24 | classifier:
25 | # out dim will be taken from registry as set by dataset builder
26 | hidden_size: 600
27 | dropout: 0.5
28 |
--------------------------------------------------------------------------------
/mmf/configs/models/butd/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | butd: &butd
3 | model_data_dir: ${env.data_dir}
4 | losses:
5 | - type: caption_cross_entropy
6 | classifier:
7 | type: language_decoder
8 | params:
9 | dropout: 0.5
10 | hidden_dim: 1024
11 | feature_dim: 2048
12 | fc_bias_init: 0
13 | image_feature_embeddings:
14 | - modal_combine:
15 | type: top_down_attention_lstm
16 | params:
17 | dropout: 0.5
18 | hidden_dim: 1024
19 | attention_dim: 1024
20 | normalization: softmax
21 | transform:
22 | type: linear
23 | params:
24 | out_dim: 1
25 | image_feature_dim: 2048
26 | embedding_dim: 300
27 | image_feature_encodings:
28 | - type: finetune_faster_rcnn_fpn_fc7
29 | params:
30 | bias_file: models/detectron.defaults/fc7_b.pkl
31 | weights_file: models/detectron.defaults/fc7_w.pkl
32 | model_data_dir: ${model_config.butd.model_data_dir}
33 | inference:
34 | type: greedy
35 |
--------------------------------------------------------------------------------
/mmf/configs/models/cnn_lstm/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | cnn_lstm:
3 | losses:
4 | - type: logit_bce
5 | text_embedding:
6 | embedding_dim: 20
7 | lstm:
8 | input_size: 20
9 | hidden_size: 50
10 | bidirectional: true
11 | batch_first: true
12 | cnn:
13 | layers:
14 | input_dims: [3, 64, 128, 128, 64, 64]
15 | output_dims: [64, 128, 128, 64, 64, 10]
16 | kernel_sizes: [7, 5, 5, 5, 5, 1]
17 | classifier:
18 | type: mlp
19 | params:
20 | in_dim: 450
21 | out_dim: 2
22 |
--------------------------------------------------------------------------------
/mmf/configs/models/fusions/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./concat_bert.yaml
3 |
--------------------------------------------------------------------------------
/mmf/configs/models/lxmert/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/lxmert/defaults.yaml
3 |
--------------------------------------------------------------------------------
/mmf/configs/models/m4c/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | m4c:
3 | lr_scale_frcn: 0.1
4 | lr_scale_text_bert: 0.1
5 | lr_scale_mmt: 1.0 # no scaling
6 | text_bert_init_from_bert_base: true
7 | text_bert:
8 | num_hidden_layers: 3
9 | obj:
10 | mmt_in_dim: 2048
11 | dropout_prob: 0.1
12 | ocr:
13 | mmt_in_dim: 3002 # 300 (FastText) + 604 (PHOC) + 2048 (Faster R-CNN) + 50 (all zeros; legacy)
14 | dropout_prob: 0.1
15 | mmt:
16 | hidden_size: 768
17 | num_hidden_layers: 4
18 | classifier:
19 | type: linear
20 | ocr_max_num: 50
21 | ocr_ptr_net:
22 | hidden_size: 768
23 | query_key_size: 768
24 | params: {}
25 | model_data_dir: ${env.data_dir}
26 | losses:
27 | - type: m4c_decoding_bce_with_mask
28 |
--------------------------------------------------------------------------------
/mmf/configs/models/m4c_captioner/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | m4c_captioner:
3 | lr_scale_frcn: 0.1
4 | lr_scale_text_bert: 0.1
5 | lr_scale_mmt: 1.0 # no scaling
6 | text_bert_init_from_bert_base: true
7 | text_bert:
8 | num_hidden_layers: 3
9 | obj:
10 | mmt_in_dim: 2048
11 | dropout_prob: 0.1
12 | ocr:
13 | mmt_in_dim: 3002 # 300 (FastText) + 604 (PHOC) + 2048 (Faster R-CNN) + 50 (all zeros; legacy)
14 | dropout_prob: 0.1
15 | mmt:
16 | hidden_size: 768
17 | num_hidden_layers: 4
18 | classifier:
19 | type: linear
20 | ocr_max_num: 50
21 | ocr_ptr_net:
22 | hidden_size: 768
23 | query_key_size: 768
24 | params: {}
25 | model_data_dir: ${env.data_dir}
26 | losses:
27 | - type: m4c_decoding_bce_with_mask
28 | remove_unk_in_pred: true
29 |
--------------------------------------------------------------------------------
/mmf/configs/models/mmbt/classification.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | mmbt:
3 | training_head_type: classification
4 | num_labels: 2
5 | losses:
6 | - type: cross_entropy
7 |
--------------------------------------------------------------------------------
/mmf/configs/models/mmbt/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/mmf/configs/models/mmbt/with_features.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | mmbt:
3 | model_data_dir: ${env.data_dir}
4 | direct_features_input: true
5 | modal_encoder:
6 | type: finetune_faster_rcnn_fpn_fc7
7 | params:
8 | in_dim: 2048
9 | bias_file: models/detectron.defaults/fc7_b.pkl
10 | weights_file: models/detectron.defaults/fc7_w.pkl
11 | model_data_dir: ${model_config.mmbt.model_data_dir}
12 |
--------------------------------------------------------------------------------
/mmf/configs/models/mmf_transformer/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/mmf_transformer/defaults.yaml
3 |
4 | model_config:
5 | mmf_transformer:
6 | heads:
7 | - type: mlm
8 | freeze: false
9 | lr_multiplier: 1.0
10 | # default for bert base
11 | hidden_size: 768
12 | # default vocab size for bert base
13 | vocab_size: 30522
14 |
--------------------------------------------------------------------------------
/mmf/configs/models/movie_mcan/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | movie_mcan:
3 | model_data_dir: ${env.data_dir}
4 | classifier:
5 | type: triple_linear
6 | params: {}
7 | image_feature_embeddings:
8 | type: two_branches
9 | params:
10 | hidden_dim: 1024
11 | cond_dim: 1024
12 | num_attn: 8
13 | dropout: 0.1
14 | num_layers: 6
15 | cbn_num_layers: 4
16 | image_feature_dim: 2048
17 | image_feature_encodings:
18 | type: default
19 | params:
20 | model_data_dir: ${model_config.movie_mcan.model_data_dir}
21 | cond_features: 1024
22 | in_dim: ${model_config.movie_mcan.image_feature_dim}
23 | text_embeddings:
24 | type: mcan
25 | params:
26 | hidden_dim: 1024
27 | embedding_dim: 300
28 | num_attn: 8
29 | dropout: 0.1
30 | num_layers: 6
31 | num_attn_pool: 1
32 | num_feat: 2
33 | model_data_dir: ${model_config.movie_mcan.model_data_dir}
34 |
--------------------------------------------------------------------------------
/mmf/configs/models/unimodal/bert.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | unimodal_text:
3 | bert_model_name: bert-base-uncased
4 | text_hidden_size: 768
5 | num_labels: 2
6 | text_encoder:
7 | type: transformer
8 | params:
9 | bert_model_name: ${model_config.unimodal_text.bert_model_name}
10 | hidden_size: 768
11 | num_hidden_layers: 12
12 | num_attention_heads: 12
13 | output_attentions: false
14 | output_hidden_states: false
15 |
16 | classifier:
17 | params:
18 | in_dim: 768
19 |
--------------------------------------------------------------------------------
/mmf/configs/models/unimodal/image.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | unimodal_image:
3 | # Either pretraining or classification
4 | direct_features_input: false
5 | freeze_base: false
6 | finetune_lr_multiplier: 1
7 | # Dimension of the embedding finally returned by the modal encoder
8 | modal_hidden_size: 2048
9 | # Used when classification head is activated
10 | num_labels: 2
11 | modal_encoder:
12 | type: resnet152
13 | params:
14 | pretrained: true
15 | pool_type: avg
16 | num_output_features: 1
17 |
18 | classifier:
19 | type: mlp
20 | params:
21 | in_dim: 2048
22 | out_dim: 2
23 | hidden_dim: 768
24 | num_layers: 0
25 |
--------------------------------------------------------------------------------
/mmf/configs/models/unimodal/text.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | unimodal_text:
3 | # Either pretraining or classification
4 | bert_model_name: bert-base-uncased
5 | freeze_base: false
6 | finetune_lr_multiplier: 1
7 | # Dimension of the embedding finally returned by the text encoder
8 | text_hidden_size: 300
9 | # Used when classification head is activated
10 | num_labels: 2
11 | text_encoder:
12 | type: embedding
13 | params:
14 | operator: sum
15 | embedding_params:
16 | type: vocab
17 | params:
18 | type: intersected
19 | embedding_name: glove.6B.300d
20 | embedding_dim: 300
21 | data_dir: ${env.data_dir}
22 | vocab_file: vocabs/vocabulary_100k.txt
23 |
24 | classifier:
25 | type: mlp
26 | params:
27 | in_dim: 300
28 | out_dim: 2
29 | hidden_dim: 768
30 | num_layers: 0
31 |
--------------------------------------------------------------------------------
/mmf/configs/models/unimodal/with_features.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | unimodal_image:
3 | model_data_dir: ${env.data_dir}
4 | direct_features_input: true
5 | modal_encoder:
6 | type: finetune_faster_rcnn_fpn_fc7
7 | params:
8 | in_dim: 2048
9 | bias_file: models/detectron.defaults/fc7_b.pkl
10 | weights_file: models/detectron.defaults/fc7_w.pkl
11 | model_data_dir: ${model_config.unimodal_image.model_data_dir}
12 | num_output_features: 1
13 |
--------------------------------------------------------------------------------
/mmf/configs/models/uniter/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | uniter:
3 | heads:
4 | vqa2:
5 | type: mlp
6 | freeze: false
7 | lr_multiplier: 1.0
8 | in_dim: 768
9 | hidden_size: 1536
10 | num_labels: 3129
11 | pooler_name: bert_pooler
12 | text_embeddings:
13 | type: bert_embeddings
14 | image_embeddings:
15 | type: uniter_image_embeddings
16 | params:
17 | name: 'uniter_image_embeddings'
18 | encoder:
19 | type: transformer
20 | params:
21 | bert_model_name: bert-base-uncased
22 | hidden_size: 768
23 | num_hidden_layers: 12
24 | num_attention_heads: 12
25 | output_attentions: false
26 | output_hidden_states: false
27 | tasks:
28 | - vqa2
29 |
--------------------------------------------------------------------------------
/mmf/configs/models/vilbert/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/vilbert/defaults.yaml
3 |
--------------------------------------------------------------------------------
/mmf/configs/models/vinvl/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | vinvl:
3 | heads:
4 | test:
5 | type: mlp
6 | freeze: false
7 | lr_multiplier: 1.0
8 | in_dim: 768
9 | hidden_size: 1536
10 | num_labels: 3129
11 | pooler_name: bert_pooler
12 | bert_model_name: bert-base-uncased
13 | loss_type: sfmx
14 | img_feature_dim: 2054
15 | img_feature_type: 'frcnn'
16 | use_img_layernorm: 1
17 | img_layer_norm_eps: 1e-12
18 | max_img_seq_len: 70
19 |
--------------------------------------------------------------------------------
/mmf/configs/models/visual_bert/classification.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | visual_bert:
3 | training_head_type: classification
4 |
--------------------------------------------------------------------------------
/mmf/configs/models/visual_bert/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | visual_bert:
3 | bert_model_name: bert-base-uncased
4 | training_head_type: pretraining
5 | visual_embedding_dim: 2048
6 | special_visual_initialize: true
7 | embedding_strategy: plain
8 | bypass_transformer: false
9 | output_attentions: false
10 | output_hidden_states: false
11 | random_initialize: false
12 | freeze_base: false
13 | finetune_lr_multiplier: 1
14 | # Default points to BERT pooler strategy which is to take
15 | # representation of CLS token after passing it through a dense layer
16 | pooler_strategy: default
17 | zerobias: false # Initialize last layer to predict closer to 0 on init for sigmoid outputs
18 |
--------------------------------------------------------------------------------
/mmf/configs/models/visual_bert/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/visual_bert/defaults.yaml
3 |
--------------------------------------------------------------------------------
/mmf/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from . import processors
3 | from .base_dataset import BaseDataset
4 | from .base_dataset_builder import BaseDatasetBuilder
5 | from .concat_dataset import ConcatDataset
6 | from .lightning_multi_datamodule import LightningMultiDataModule
7 | from .lightning_multi_dataset_loader import LightningMultiDataLoader
8 | from .mmf_dataset import MMFDataset
9 | from .mmf_dataset_builder import MMFDatasetBuilder
10 | from .multi_dataset_loader import MultiDatasetLoader
11 |
12 |
13 | __all__ = [
14 | "processors",
15 | "BaseDataset",
16 | "BaseDatasetBuilder",
17 | "ConcatDataset",
18 | "MultiDatasetLoader",
19 | "MMFDataset",
20 | "MMFDatasetBuilder",
21 | "LightningMultiDataModule",
22 | "LightningMultiDataLoader",
23 | ]
24 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/airstore/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.utils.env import import_files
4 |
5 |
6 | import_files(__file__, "mmf.datasets.builders.airstore")
7 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/airstore/builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.common.registry import registry
4 | from mmf.datasets.builders.airstore.dataset import AirstoreDataset
5 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
6 |
7 |
8 | @registry.register_builder("airstore")
9 | class AirstoreDatasetBuilder(MMFDatasetBuilder):
10 | def __init__(
11 | self, dataset_name="airstore", dataset_class=AirstoreDataset, *args, **kwargs
12 | ):
13 | super().__init__(dataset_name)
14 | self.dataset_class = AirstoreDataset
15 |
16 | @classmethod
17 | def config_path(cls):
18 | return "configs/datasets/airstore/defaults.yaml"
19 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/charades/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/charades/__init__.py
--------------------------------------------------------------------------------
/mmf/datasets/builders/charades/builder.py:
--------------------------------------------------------------------------------
1 | from mmf.common.registry import registry
2 | from mmf.datasets.builders.charades.dataset import CharadesDataset
3 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
4 |
5 |
6 | @registry.register_builder("charades")
7 | class CharadesBuilder(MMFDatasetBuilder):
8 | def __init__(
9 | self, dataset_name="charades", dataset_class=CharadesDataset, *args, **kwargs
10 | ):
11 | super().__init__(dataset_name)
12 | self.dataset_class = CharadesDataset
13 |
14 | @classmethod
15 | def config_path(cls):
16 | return "configs/datasets/charades/defaults.yaml"
17 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/clevr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/clevr/__init__.py
--------------------------------------------------------------------------------
/mmf/datasets/builders/coco/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | __all__ = [
3 | "COCOBuilder",
4 | "COCODataset",
5 | "DetectionCOCOBuilder",
6 | "DetectionCOCODataset",
7 | "MaskedCOCOBuilder",
8 | "MaskedCOCODataset",
9 | ]
10 |
11 | from .builder import COCOBuilder
12 | from .dataset import COCODataset
13 | from .detection_builder import DetectionCOCOBuilder
14 | from .detection_dataset import DetectionCOCODataset
15 | from .masked_builder import MaskedCOCOBuilder
16 | from .masked_dataset import MaskedCOCODataset
17 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/coco/detection_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import registry
3 | from mmf.datasets.builders.coco.detection_dataset import DetectionCOCODataset
4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
5 |
6 |
7 | @registry.register_builder("detection_coco")
8 | class DetectionCOCOBuilder(MMFDatasetBuilder):
9 | def __init__(self):
10 | super().__init__(
11 | dataset_name="detection_coco", dataset_class=DetectionCOCODataset
12 | )
13 |
14 | @classmethod
15 | def config_path(cls):
16 | return "configs/datasets/coco/detection.yaml"
17 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/coco/masked_builder.py:
--------------------------------------------------------------------------------
1 | from mmf.common.registry import registry
2 | from mmf.datasets.builders.coco.builder import COCOBuilder
3 |
4 | from .masked_dataset import MaskedCOCODataset
5 |
6 |
7 | @registry.register_builder("masked_coco")
8 | class MaskedCOCOBuilder(COCOBuilder):
9 | def __init__(self):
10 | super().__init__()
11 | self.dataset_name = "masked_coco"
12 | self.set_dataset_class(MaskedCOCODataset)
13 |
14 | def update_registry_for_model(self, config):
15 | registry.register(
16 | self.dataset_name + "_text_vocab_size",
17 | self.dataset.masked_token_processor.get_vocab_size(),
18 | )
19 |
20 | @classmethod
21 | def config_path(cls):
22 | return "configs/datasets/coco/masked.yaml"
23 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/coco2017/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/coco2017/__init__.py
--------------------------------------------------------------------------------
/mmf/datasets/builders/coco2017/masked_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import registry
3 | from mmf.datasets.builders.coco2017.masked_dataset import MaskedCoco2017Dataset
4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
5 |
6 |
7 | @registry.register_builder("masked_coco2017")
8 | class MaskedFlickr30kBuilder(MMFDatasetBuilder):
9 | def __init__(
10 | self,
11 | dataset_name="masked_coco2017",
12 | dataset_class=MaskedCoco2017Dataset,
13 | *args,
14 | **kwargs,
15 | ):
16 | super().__init__(dataset_name, dataset_class, *args, **kwargs)
17 |
18 | @classmethod
19 | def config_path(cls):
20 | return "configs/datasets/coco2017/masked.yaml"
21 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/coco2017/masked_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.common.typings import MMFDatasetConfigType
4 | from mmf.datasets.builders.localized_narratives.masked_dataset import (
5 | MaskedLocalizedNarrativesDatasetMixin,
6 | )
7 | from mmf.datasets.mmf_dataset import MMFDataset
8 |
9 |
10 | class MaskedCoco2017Dataset(MaskedLocalizedNarrativesDatasetMixin, MMFDataset):
11 | def __init__(
12 | self,
13 | config: MMFDatasetConfigType,
14 | dataset_type: str,
15 | index: int,
16 | *args,
17 | **kwargs,
18 | ):
19 | super().__init__(
20 | "masked_coco2017", config, dataset_type, index, *args, **kwargs
21 | )
22 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/conceptual_captions/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | __all__ = [
3 | "ConceptualCaptionsBuilder",
4 | "ConceptualCaptionsDataset",
5 | "MaskedConceptualCaptionsBuilder",
6 | "MaskedConceptualCaptionsDataset",
7 | ]
8 |
9 | from .builder import ConceptualCaptionsBuilder
10 | from .dataset import ConceptualCaptionsDataset
11 | from .masked_builder import MaskedConceptualCaptionsBuilder
12 | from .masked_dataset import MaskedConceptualCaptionsDataset
13 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/conceptual_captions/builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.common.registry import registry
4 | from mmf.datasets.builders.coco import COCOBuilder
5 |
6 | from .dataset import ConceptualCaptionsDataset
7 |
8 |
9 | @registry.register_builder("conceptual_captions")
10 | class ConceptualCaptionsBuilder(COCOBuilder):
11 | def __init__(self):
12 | super().__init__()
13 | self.dataset_name = "conceptual_captions"
14 | self.set_dataset_class(ConceptualCaptionsDataset)
15 |
16 | @classmethod
17 | def config_path(cls):
18 | return "configs/datasets/conceptual_captions/defaults.yaml"
19 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/conceptual_captions/masked_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import registry
3 | from mmf.datasets.builders.coco import MaskedCOCOBuilder
4 |
5 | from .masked_dataset import MaskedConceptualCaptionsDataset
6 |
7 |
8 | @registry.register_builder("masked_conceptual_captions")
9 | class MaskedConceptualCaptionsBuilder(MaskedCOCOBuilder):
10 | def __init__(self):
11 | super().__init__()
12 | self.dataset_name = "masked_conceptual_captions"
13 | self.set_dataset_class(MaskedConceptualCaptionsDataset)
14 |
15 | @classmethod
16 | def config_path(cls):
17 | return "configs/datasets/conceptual_captions/masked.yaml"
18 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/conceptual_captions/masked_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.datasets.builders.coco import MaskedCOCODataset
4 |
5 |
6 | class MaskedConceptualCaptionsDataset(MaskedCOCODataset):
7 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
8 | super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs)
9 | self.dataset_name = "masked_conceptual_captions"
10 | self._two_sentence = config.get("two_sentence", True)
11 | self._false_caption = config.get("false_caption", True)
12 | self._two_sentence_probability = config.get("two_sentence_probability", 0.5)
13 | self._false_caption_probability = config.get("false_caption_probability", 0.5)
14 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/flickr30k/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/flickr30k/__init__.py
--------------------------------------------------------------------------------
/mmf/datasets/builders/flickr30k/masked_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import registry
3 | from mmf.datasets.builders.flickr30k.masked_dataset import MaskedFlickr30kDataset
4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
5 |
6 |
7 | @registry.register_builder("masked_flickr30k")
8 | class MaskedFlickr30kBuilder(MMFDatasetBuilder):
9 | def __init__(
10 | self,
11 | dataset_name="masked_flickr30k",
12 | dataset_class=MaskedFlickr30kDataset,
13 | *args,
14 | **kwargs,
15 | ):
16 | super().__init__(dataset_name, dataset_class, *args, **kwargs)
17 |
18 | @classmethod
19 | def config_path(cls):
20 | return "configs/datasets/flickr30k/masked.yaml"
21 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/flickr30k/masked_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.common.typings import MMFDatasetConfigType
4 | from mmf.datasets.builders.localized_narratives.masked_dataset import (
5 | MaskedLocalizedNarrativesDatasetMixin,
6 | )
7 | from mmf.datasets.mmf_dataset import MMFDataset
8 |
9 |
10 | class MaskedFlickr30kDataset(MaskedLocalizedNarrativesDatasetMixin, MMFDataset):
11 | def __init__(
12 | self,
13 | config: MMFDatasetConfigType,
14 | dataset_type: str,
15 | index: int,
16 | *args,
17 | **kwargs,
18 | ):
19 | super().__init__(
20 | "masked_flickr30k", config, dataset_type, index, *args, **kwargs
21 | )
22 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/glue/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/gqa/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | __all__ = ["GQABuilder", "GQADataset", "MaskedGQABuilder", "MaskedGQADataset"]
4 |
5 | from .builder import GQABuilder
6 | from .dataset import GQADataset
7 | from .masked_builder import MaskedGQABuilder
8 | from .masked_dataset import MaskedGQADataset
9 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/gqa/masked_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.common.registry import registry
4 | from mmf.datasets.builders.gqa.builder import GQABuilder
5 | from mmf.datasets.builders.gqa.masked_dataset import MaskedGQADataset
6 |
7 |
8 | @registry.register_builder("masked_gqa")
9 | class MaskedGQABuilder(GQABuilder):
10 | def __init__(self):
11 | super().__init__()
12 | self.dataset_name = "masked_gqa"
13 | self.dataset_class = MaskedGQADataset
14 |
15 | @classmethod
16 | def config_path(cls):
17 | return "configs/datasets/gqa/masked.yaml"
18 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/hateful_memes/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/localized_narratives/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/localized_narratives/__init__.py
--------------------------------------------------------------------------------
/mmf/datasets/builders/localized_narratives/masked_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import registry
3 | from mmf.datasets.builders.localized_narratives.masked_dataset import (
4 | MaskedLocalizedNarrativesDataset,
5 | )
6 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
7 |
8 |
9 | @registry.register_builder("masked_localized_narratives")
10 | class MaskedLocalizedNarrativesBuilder(MMFDatasetBuilder):
11 | def __init__(
12 | self,
13 | dataset_name="masked_localized_narratives",
14 | dataset_class=MaskedLocalizedNarrativesDataset,
15 | *args,
16 | **kwargs,
17 | ):
18 | super().__init__(dataset_name, dataset_class, *args, **kwargs)
19 |
20 | @classmethod
21 | def config_path(cls):
22 | return "configs/datasets/localized_narratives/masked.yaml"
23 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/mmimdb/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/mmimdb/masked_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | #
7 |
8 | from mmf.common.registry import registry
9 | from mmf.datasets.builders.mmimdb.masked_dataset import MaskedMMImdbDataset
10 | from mmf.datasets.builders.vqa2.builder import VQA2Builder
11 |
12 |
13 | @registry.register_builder("masked_mmimdb")
14 | class MaskedMMImdbBuilder(VQA2Builder):
15 | def __init__(self):
16 | super().__init__()
17 | self.dataset_name = "masked_mmimdb"
18 | self.dataset_class = MaskedMMImdbDataset
19 |
20 | @classmethod
21 | def config_path(cls):
22 | return "configs/datasets/mmimdb/masked.yaml"
23 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/nlvr2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/nlvr2/builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | #
7 |
8 | from mmf.common.registry import registry
9 | from mmf.datasets.builders.nlvr2.dataset import NLVR2Dataset
10 | from mmf.datasets.builders.vqa2.builder import VQA2Builder
11 |
12 |
13 | @registry.register_builder("nlvr2")
14 | class NLVR2Builder(VQA2Builder):
15 | def __init__(self):
16 | super().__init__()
17 | self.dataset_name = "nlvr2"
18 | self.dataset_class = NLVR2Dataset
19 |
20 | @classmethod
21 | def config_path(cls):
22 | return "configs/datasets/nlvr2/defaults.yaml"
23 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/ocrvqa/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/ocrvqa/builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import Registry
3 | from mmf.datasets.builders.ocrvqa.dataset import OCRVQADataset
4 | from mmf.datasets.builders.textvqa.builder import TextVQABuilder
5 |
6 |
7 | @Registry.register_builder("ocrvqa")
8 | class OCRVQABuilder(TextVQABuilder):
9 | def __init__(self):
10 | super().__init__()
11 | self.dataset_name = "ocrvqa"
12 | self.set_dataset_class(OCRVQADataset)
13 |
14 | @classmethod
15 | def config_path(cls):
16 | return "configs/datasets/ocrvqa/defaults.yaml"
17 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/ocrvqa/dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.datasets.builders.textvqa.dataset import TextVQADataset
3 |
4 |
5 | class OCRVQADataset(TextVQADataset):
6 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
7 | super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs)
8 | self.dataset_name = "ocrvqa"
9 |
10 | def preprocess_sample_info(self, sample_info):
11 | # Do nothing in this case
12 | return sample_info
13 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/okvqa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/okvqa/__init__.py
--------------------------------------------------------------------------------
/mmf/datasets/builders/okvqa/builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import registry
3 | from mmf.datasets.builders.okvqa.dataset import OKVQADataset
4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
5 |
6 |
7 | @registry.register_builder("okvqa")
8 | class OKVQABuilder(MMFDatasetBuilder):
9 | def __init__(
10 | self, dataset_name="okvqa", dataset_class=OKVQADataset, *args, **kwargs
11 | ):
12 | super().__init__(dataset_name, dataset_class, *args, **kwargs)
13 |
14 | @classmethod
15 | def config_path(cls):
16 | return "configs/datasets/okvqa/defaults.yaml"
17 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/retrieval/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | __all__ = ["RetrievalDataset", "RetrievalBuilder"]
4 |
5 | from .builder import RetrievalBuilder
6 | from .dataset import RetrievalDataset
7 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/retrieval/builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | #
7 |
8 |
9 | from mmf.common.registry import registry
10 | from mmf.datasets.builders.retrieval.dataset import RetrievalDataset
11 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
12 |
13 |
14 | @registry.register_builder("retrieval")
15 | class RetrievalBuilder(MMFDatasetBuilder):
16 | def __init__(
17 | self, dataset_name="retrieval", dataset_class=RetrievalDataset, *args, **kwargs
18 | ):
19 | super().__init__(dataset_name, dataset_class, *args, **kwargs)
20 |
21 |
22 | @classmethod
23 | def config_path(cls):
24 | return "config/datasets/retrieval/flickr30k_defaults.yaml"
25 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/sbu_captions/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | __all__ = ["MaskedSBUBuilder", "MaskedSBUDataset"]
4 |
5 | from .masked_builder import MaskedSBUBuilder
6 | from .masked_dataset import MaskedSBUDataset
7 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/sbu_captions/masked_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.common.registry import registry
4 | from mmf.datasets.builders.coco import MaskedCOCOBuilder
5 |
6 | from .masked_dataset import MaskedSBUDataset
7 |
8 |
9 | @registry.register_builder("masked_sbu")
10 | class MaskedSBUBuilder(MaskedCOCOBuilder):
11 | def __init__(self):
12 | super().__init__()
13 | self.dataset_name = "masked_sbu"
14 | self.set_dataset_class(MaskedSBUDataset)
15 |
16 | @classmethod
17 | def config_path(cls):
18 | return "configs/datasets/sbu_captions/masked.yaml"
19 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/sbu_captions/masked_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.datasets.builders.coco import MaskedCOCODataset
4 |
5 |
6 | class MaskedSBUDataset(MaskedCOCODataset):
7 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
8 | super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs)
9 | self.dataset_name = "masked_sbu"
10 | self._two_sentence = config.get("two_sentence", True)
11 | self._false_caption = config.get("false_caption", True)
12 | self._two_sentence_probability = config.get("two_sentence_probability", 0.5)
13 | self._false_caption_probability = config.get("false_caption_probability", 0.5)
14 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/stvqa/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/stvqa/builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import Registry
3 | from mmf.datasets.builders.stvqa.dataset import STVQADataset
4 | from mmf.datasets.builders.textvqa.builder import TextVQABuilder
5 |
6 |
7 | @Registry.register_builder("stvqa")
8 | class STVQABuilder(TextVQABuilder):
9 | def __init__(self):
10 | super().__init__()
11 | self.dataset_name = "stvqa"
12 | self.set_dataset_class(STVQADataset)
13 |
14 | @classmethod
15 | def config_path(cls):
16 | return "configs/datasets/stvqa/defaults.yaml"
17 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/stvqa/dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.datasets.builders.textvqa.dataset import TextVQADataset
3 |
4 |
5 | class STVQADataset(TextVQADataset):
6 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
7 | super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs)
8 | self.dataset_name = "stvqa"
9 |
10 | def preprocess_sample_info(self, sample_info):
11 | feature_path = sample_info["feature_path"]
12 | append = "train"
13 |
14 | if self.dataset_type == "test":
15 | append = "test_task3"
16 |
17 | if not feature_path.startswith(append):
18 | feature_path = append + "/" + feature_path
19 |
20 | sample_info["feature_path"] = feature_path
21 | return sample_info
22 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/textcaps/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/textvqa/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/vinvl/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | __all__ = ["VinVLBuilder", "VinVLDataset"]
3 |
4 | from .builder import VinVLBuilder
5 | from .dataset import VinVLDataset
6 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_dialog/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_dialog/dataset.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import json
3 |
4 | import torch
5 | from mmf.common.sample import Sample
6 | from mmf.datasets.builders.visual_dialog.database import VisualDialogDatabase
7 | from mmf.datasets.builders.vqa2 import VQA2Dataset
8 |
9 |
10 | class VisualDialogDataset(VQA2Dataset):
11 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
12 | super().__init__(
13 | config,
14 | dataset_type,
15 | imdb_file_index,
16 | dataset_name="visual_dialog",
17 | *args,
18 | **kwargs,
19 | )
20 |
21 | discriminative = config.discriminative
22 | self._discriminative = discriminative.enabled
23 | self._return_indices = discriminative.return_indices
24 | self._no_unk = config.no_unk
25 | self._return_history = config.return_history
26 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_entailment/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_entailment/builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | #
7 |
8 | from mmf.common.registry import registry
9 | from mmf.datasets.builders.visual_entailment.dataset import VisualEntailmentDataset
10 | from mmf.datasets.builders.vqa2.builder import VQA2Builder
11 |
12 |
13 | @registry.register_builder("visual_entailment")
14 | class VisualEntailmentBuilder(VQA2Builder):
15 | def __init__(self):
16 | super().__init__()
17 | self.dataset_name = "visual_entailment"
18 | self.dataset_class = VisualEntailmentDataset
19 |
20 | @classmethod
21 | def config_path(cls):
22 | return "configs/datasets/visual_entailment/defaults.yaml"
23 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_genome/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_genome/detection_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import registry
3 | from mmf.datasets.builders.visual_genome.detection_dataset import (
4 | DetectionVisualGenomeDataset,
5 | )
6 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
7 |
8 |
9 | @registry.register_builder("detection_visual_genome")
10 | class DetectionVisualGenomeBuilder(MMFDatasetBuilder):
11 | def __init__(self):
12 | super().__init__(
13 | dataset_name="detection_visual_genome",
14 | dataset_class=DetectionVisualGenomeDataset,
15 | )
16 |
17 | @classmethod
18 | def config_path(cls):
19 | return "configs/datasets/visual_genome/detection.yaml"
20 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_genome/detection_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.datasets.builders.coco.detection_dataset import DetectionCOCODataset
3 |
4 |
5 | class DetectionVisualGenomeDataset(DetectionCOCODataset):
6 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
7 | super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs)
8 | if "name" in kwargs:
9 | name = kwargs["name"]
10 | elif "dataset_name" in kwargs:
11 | name = kwargs["dataset_name"]
12 | else:
13 | name = "detection_visual_genome"
14 | self.dataset_name = name
15 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_genome/masked_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.common.registry import registry
4 | from mmf.datasets.builders.visual_genome.builder import VisualGenomeBuilder
5 | from mmf.datasets.builders.visual_genome.masked_dataset import MaskedVisualGenomeDataset
6 |
7 |
8 | @registry.register_builder("masked_visual_genome")
9 | class MaskedVisualGenomeBuilder(VisualGenomeBuilder):
10 | def __init__(self):
11 | super().__init__()
12 | self.dataset_name = "masked_visual_genome"
13 | self.dataset_class = MaskedVisualGenomeDataset
14 |
15 | @classmethod
16 | def config_path(cls):
17 | return "configs/datasets/visual_genome/masked.yaml"
18 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/vizwiz/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .builder import VizWizBuilder
3 | from .dataset import VizWizDataset
4 |
5 |
6 | __all__ = ["VizWizBuilder", "VizWizDataset"]
7 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/vizwiz/builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import registry
3 | from mmf.datasets.builders.vizwiz.dataset import VizWizDataset
4 | from mmf.datasets.builders.vqa2 import VQA2Builder
5 |
6 |
7 | @registry.register_builder("vizwiz")
8 | class VizWizBuilder(VQA2Builder):
9 | def __init__(self):
10 | super().__init__()
11 | self.dataset_name = "vizwiz"
12 | self.set_dataset_class(VizWizDataset)
13 |
14 | @classmethod
15 | def config_path(cls):
16 | return "configs/datasets/vizwiz/defaults.yaml"
17 |
18 | def update_registry_for_model(self, config):
19 | super().update_registry_for_model(config)
20 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/vqa2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | __all__ = ["VQA2Builder", "VQA2Dataset"]
3 |
4 | from .builder import VQA2Builder
5 | from .dataset import VQA2Dataset
6 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/vqa2/masked_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | #
7 |
8 | from mmf.common.registry import registry
9 | from mmf.datasets.builders.vqa2.builder import VQA2Builder
10 | from mmf.datasets.builders.vqa2.masked_dataset import MaskedVQA2Dataset
11 |
12 |
13 | @registry.register_builder("masked_vqa2")
14 | class MaskedVQA2Builder(VQA2Builder):
15 | def __init__(self):
16 | super().__init__()
17 | self.dataset_name = "masked_vqa2"
18 | self.dataset_class = MaskedVQA2Dataset
19 |
20 | @classmethod
21 | def config_path(cls):
22 | return "configs/datasets/vqa2/masked.yaml"
23 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/vqa2/masked_q_vqa2_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | #
7 |
8 |
9 | import os
10 | import warnings
11 |
12 | from mmf.common.registry import registry
13 | from mmf.datasets.builders.vqa2.builder import VQA2Builder
14 | from mmf.datasets.builders.vqa2.masked_q_vqa2_dataset import MaskedQVQA2Dataset
15 | from mmf.datasets.concat_dataset import MMFConcatDataset
16 |
17 |
18 | @registry.register_builder("masked_q_vqa2")
19 | class MaskedQVQA2Builder(VQA2Builder):
20 | def __init__(self):
21 | super().__init__()
22 | self.dataset_name = "masked_q_vqa2"
23 | self.dataset_class = MaskedQVQA2Dataset
24 |
25 | @classmethod
26 | def config_path(cls):
27 | return "configs/datasets/vqa2/masked_q.yaml"
28 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/vqa2/ocr_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import Registry
3 | from mmf.datasets.builders.vizwiz import VizWizBuilder
4 | from mmf.datasets.builders.vqa2.ocr_dataset import VQA2OCRDataset
5 |
6 |
7 | @Registry.register_builder("vqa2_ocr")
8 | class TextVQABuilder(VizWizBuilder):
9 | def __init__(self):
10 | super().__init__()
11 | self.dataset_name = "VQA2_OCR"
12 | self.set_dataset_class(VQA2OCRDataset)
13 |
14 | @classmethod
15 | def config_path(self):
16 | return None
17 |
--------------------------------------------------------------------------------
/mmf/datasets/builders/vqacp_v2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/vqacp_v2/__init__.py
--------------------------------------------------------------------------------
/mmf/datasets/builders/vqacp_v2/builder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import registry
3 | from mmf.datasets.builders.vqacp_v2.dataset import VQACPv2Dataset
4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
5 |
6 |
7 | @registry.register_builder("vqacp_v2")
8 | class VQACPv2Builder(MMFDatasetBuilder):
9 | def __init__(
10 | self, dataset_name="vqacp_v2", dataset_class=VQACPv2Dataset, *args, **kwargs
11 | ):
12 | super().__init__(dataset_name, dataset_class, *args, **kwargs)
13 |
14 | @classmethod
15 | def config_path(cls):
16 | return "configs/datasets/vqacp_v2/defaults.yaml"
17 |
--------------------------------------------------------------------------------
/mmf/datasets/databases/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import mmf.datasets.databases.readers # noqa
3 |
4 | from .annotation_database import AnnotationDatabase
5 | from .features_database import FeaturesDatabase
6 | from .image_database import ImageDatabase
7 | from .scene_graph_database import SceneGraphDatabase
8 |
9 |
10 | __all__ = [
11 | "AnnotationDatabase",
12 | "FeaturesDatabase",
13 | "ImageDatabase",
14 | "SceneGraphDatabase",
15 | ]
16 |
--------------------------------------------------------------------------------
/mmf/datasets/databases/readers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/datasets/databases/scene_graph_database.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.datasets.databases.annotation_database import AnnotationDatabase
3 |
4 |
5 | class SceneGraphDatabase(AnnotationDatabase):
6 | def __init__(self, config, scene_graph_path, *args, **kwargs):
7 | super().__init__(config, scene_graph_path, *args, **kwargs)
8 | self.data_dict = {}
9 | for item in self.data:
10 | self.data_dict[item["image_id"]] = item
11 |
12 | def __getitem__(self, idx):
13 | return self.data_dict[idx]
14 |
--------------------------------------------------------------------------------
/mmf/datasets/subset_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from torch.utils.data.dataset import Subset
4 |
5 |
6 | class MMFSubset(Subset):
7 | def __init__(self, dataset, indices):
8 | super().__init__(dataset, indices)
9 | self._dir_representation = dir(self)
10 |
11 | def __getattr__(self, name):
12 | if "_dir_representation" in self.__dict__ and name in self._dir_representation:
13 | return getattr(self, name)
14 | elif "dataset" in self.__dict__ and hasattr(self.dataset, name):
15 | return getattr(self.dataset, name)
16 | else:
17 | raise AttributeError(name)
18 |
--------------------------------------------------------------------------------
/mmf/models/albef/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import mmf.models.albef.vit # noqa
3 |
--------------------------------------------------------------------------------
/mmf/models/interfaces/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/models/m4c_captioner.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.common.registry import registry
3 | from mmf.models.m4c import M4C
4 |
5 |
6 | @registry.register_model("m4c_captioner")
7 | class M4CCaptioner(M4C):
8 | def __init__(self, config):
9 | super().__init__(config)
10 | self.remove_unk_in_pred = self.config.remove_unk_in_pred
11 |
12 | @classmethod
13 | def config_path(cls):
14 | return "configs/models/m4c_captioner/defaults.yaml"
15 |
16 | def _forward_output(self, sample_list, fwd_results):
17 | super()._forward_output(sample_list, fwd_results)
18 |
19 | if self.remove_unk_in_pred:
20 | # avoid outputting in the generated captions
21 | fwd_results["scores"][..., self.answer_processor.UNK_IDX] = -1e10
22 |
23 | return fwd_results
24 |
--------------------------------------------------------------------------------
/mmf/models/transformers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import mmf.models.transformers.backends # noqa
4 | from mmf.models.transformers.base import ( # noqa
5 | BaseTransformer,
6 | BaseTransformerBackend,
7 | BaseTransformerBackendConfig,
8 | BaseTransformerHead,
9 | BaseTransformerInput,
10 | BaseTransformerModalityConfig,
11 | )
12 |
--------------------------------------------------------------------------------
/mmf/models/transformers/backends/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.utils.env import import_files
4 |
5 |
6 | import_files(__file__, "mmf.models.transformers.backends")
7 |
--------------------------------------------------------------------------------
/mmf/models/transformers/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.utils.env import import_files
4 |
5 |
6 | import_files(__file__, "mmf.models.transformers.heads")
7 |
--------------------------------------------------------------------------------
/mmf/models/unit/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | __all__ = ["UniT"]
3 |
4 | from .unit import UniT
5 |
--------------------------------------------------------------------------------
/mmf/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import mmf.modules.losses # noqa
3 | import mmf.modules.metrics # noqa
4 | import mmf.modules.optimizers # noqa
5 | import mmf.modules.schedulers # noqa
6 |
--------------------------------------------------------------------------------
/mmf/projects:
--------------------------------------------------------------------------------
1 | ../projects
--------------------------------------------------------------------------------
/mmf/trainers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | __all__ = ["BaseTrainer"]
3 |
4 | from .base_trainer import BaseTrainer
5 |
--------------------------------------------------------------------------------
/mmf/trainers/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/trainers/callbacks/lr_scheduler.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.trainers.callbacks.base import Callback
4 | from mmf.utils.build import build_scheduler
5 |
6 |
7 | class LRSchedulerCallback(Callback):
8 | """Callback which executes a LR scheduler. It is executed after every
9 | batch iteration.
10 | """
11 |
12 | def __init__(self, config, trainer):
13 | """
14 | Attr:
15 | config(mmf_typings.DictConfig): Config for the callback
16 | trainer(Type[BaseTrainer]): Trainer object
17 | """
18 | super().__init__(config, trainer)
19 |
20 | self._scheduler = None
21 | if self.training_config.lr_scheduler is True:
22 | self._scheduler = build_scheduler(trainer.optimizer, self.config)
23 |
24 | def on_update_end(self, **kwargs):
25 | if self._scheduler is not None:
26 | self._scheduler.step()
27 |
--------------------------------------------------------------------------------
/mmf/trainers/core/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/trainers/core/profiling.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import logging
4 | import threading
5 | from abc import ABC
6 | from typing import Type
7 |
8 | from mmf.utils.timer import Timer
9 |
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | class TrainerProfilingMixin(ABC):
15 | profiler: Type[Timer] = Timer()
16 |
17 | def profile(self, text: str) -> None:
18 | if self.training_config.logger_level != "debug":
19 | return
20 | logging.debug(
21 | f"tid={threading.current_thread().ident}, {text}: {self.profiler.get_time_since_start()}"
22 | )
23 | self.profiler.reset()
24 |
--------------------------------------------------------------------------------
/mmf/trainers/lightning_core/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/utils/features/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf/utils/file_io.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from iopath.common.file_io import PathManager as pm
4 |
5 |
6 | PathManager = pm()
7 |
8 | try:
9 | # [FB only] register internal file IO handlers
10 | from mmf.utils.fb.file_io_handlers import register_handlers
11 |
12 | register_handlers(PathManager)
13 | except ImportError:
14 | pass
15 |
--------------------------------------------------------------------------------
/mmf/utils/phoc/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from .build_phoc import build_phoc # NoQA
4 |
--------------------------------------------------------------------------------
/mmf/utils/phoc/build_phoc.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from .cphoc import build_phoc as _build_phoc_raw
4 |
5 |
6 | _alphabet = {
7 | "a",
8 | "b",
9 | "c",
10 | "d",
11 | "e",
12 | "f",
13 | "g",
14 | "h",
15 | "i",
16 | "j",
17 | "k",
18 | "l",
19 | "m",
20 | "n",
21 | "o",
22 | "p",
23 | "q",
24 | "r",
25 | "s",
26 | "t",
27 | "u",
28 | "v",
29 | "w",
30 | "x",
31 | "y",
32 | "z",
33 | "0",
34 | "1",
35 | "2",
36 | "3",
37 | "4",
38 | "5",
39 | "6",
40 | "7",
41 | "8",
42 | "9",
43 | } # NoQA
44 |
45 |
46 | def build_phoc(token):
47 | token = token.lower().strip()
48 | token = "".join([c for c in token if c in _alphabet])
49 | phoc = _build_phoc_raw(token)
50 | phoc = np.array(phoc, dtype=np.float32)
51 | return phoc
52 |
--------------------------------------------------------------------------------
/mmf/utils/torchscript.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from typing import Dict, Optional
4 |
5 | from torch import Tensor
6 |
7 |
8 | def getattr_torchscriptable(
9 | dictionary: Dict[str, Tensor], key: str, default: Optional[Tensor] = None
10 | ) -> Optional[Tensor]:
11 | if key in dictionary:
12 | return dictionary[key]
13 | else:
14 | return default
15 |
--------------------------------------------------------------------------------
/mmf/utils/transform.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from torch import Tensor
4 |
5 |
6 | def transform_to_batch_sequence(tensor: Tensor) -> Tensor:
7 | if len(tensor.size()) == 2:
8 | return tensor
9 | else:
10 | assert len(tensor.size()) == 3
11 | return tensor.contiguous().view(-1, tensor.size(-1))
12 |
13 |
14 | def transform_to_batch_sequence_dim(tensor: Tensor) -> Tensor:
15 | if len(tensor.size()) == 3:
16 | return tensor
17 | else:
18 | assert len(tensor.size()) == 4
19 | return tensor.contiguous().view(-1, tensor.size(-2), tensor.size(-1))
20 |
--------------------------------------------------------------------------------
/mmf/version.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import sys
4 |
5 |
6 | __version__ = "1.0.0rc12"
7 |
8 | msg = "MMF is only compatible with Python 3.6 and newer."
9 |
10 |
11 | if sys.version_info < (3, 6):
12 | raise ImportError(msg)
13 |
--------------------------------------------------------------------------------
/mmf_cli/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/mmf_cli/predict.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3 -u
2 | # Copyright (c) Facebook, Inc. and its affiliates.
3 |
4 | import sys
5 |
6 | from mmf_cli.run import run
7 |
8 |
9 | def predict(opts=None):
10 | if opts is None:
11 | sys.argv.extend(["evaluation.predict=true"])
12 | else:
13 | opts.extend(["evaluation.predict=true"])
14 |
15 | run(predict=True)
16 |
17 |
18 | if __name__ == "__main__":
19 | predict()
20 |
--------------------------------------------------------------------------------
/mmf_cli/torchx_entryscript.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | """
3 | Entrypoint script used by TorchX to start the training run in each process
4 | """
5 |
6 | from mmf_cli.fb_run import fb_scheduler_run
7 |
8 |
9 | if __name__ == "__main__":
10 | fb_scheduler_run()
11 |
--------------------------------------------------------------------------------
/projects/ban/README.md:
--------------------------------------------------------------------------------
1 | # BAN
2 |
3 | This repository contains the code for BAN model. Please cite the following paper if you are using BAN model from mmf:
4 |
5 | * Kim, J. H., Jun, J., & Zhang, B. T. (2018). *Bilinear attention networks*. In Advances in Neural Information Processing Systems (pp. 1564-1574). ([arXiV](https://arxiv.org/abs/1805.07932))
6 | ```
7 | @inproceedings{kim2018bilinear,
8 | title={Bilinear attention networks},
9 | author={Kim, Jin-Hwa and Jun, Jaehyun and Zhang, Byoung-Tak},
10 | booktitle={Advances in Neural Information Processing Systems},
11 | pages={1564--1574},
12 | year={2018}
13 | }
14 | ```
15 |
16 | ## Installation
17 |
18 | Follow installation instructions in the [documentation](https://mmf.readthedocs.io/en/latest/notes/installation.html).
19 |
20 | ## Training
21 | To train BAN model on the VQA2 dataset, run the following command
22 | ```
23 | mmf_run config=projects/ban/configs/vqa2/defaults.yaml run_type=train_val dataset=vqa2 model=ban
24 | ```
25 |
--------------------------------------------------------------------------------
/projects/ban/configs/textvqa/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../../../mmf/configs/textvqa/defaults.yaml
3 |
4 | evaluation:
5 | metrics:
6 | - vqa_accuracy
7 |
8 | training:
9 | early_stop:
10 | criteria: textvqa/vqa_accuracy
11 | minimize: false
12 |
--------------------------------------------------------------------------------
/projects/ban/configs/vizwiz/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../../../mmf/configs/vizwiz/defaults.yaml
3 |
4 | evaluation:
5 | metrics:
6 | - vqa_accuracy
7 |
8 | training:
9 | early_stop:
10 | criteria: vizwiz/vqa_accuracy
11 | minimize: false
12 |
--------------------------------------------------------------------------------
/projects/ban/configs/vqa2/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../../../mmf/configs/vqa2/defaults.yaml
3 |
4 | evaluation:
5 | metrics:
6 | - vqa_accuracy
7 |
8 | training:
9 | early_stop:
10 | criteria: vqa2/vqa_accuracy
11 | minimize: false
12 |
--------------------------------------------------------------------------------
/projects/butd/README.md:
--------------------------------------------------------------------------------
1 | # BUTD
2 |
3 | This repository contains the code for pytorch implementation of BUTD model, released originally under this ([repo](https://github.com/peteanderson80/bottom-up-attention)). Please cite the following paper if you are using BUTD model from mmf:
4 |
5 | * Anderson, P., He, X., Buehler, C., Teney, D., Johnson, M., Gould, S., & Zhang, L. (2018). *Bottom-up and top-down attention for image captioning and visual question answering*. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 6077-6086). ([arXiV](https://arxiv.org/abs/1707.07998))
6 | ```
7 | @inproceedings{Anderson2017up-down,
8 | author = {Peter Anderson and Xiaodong He and Chris Buehler and Damien Teney and Mark Johnson and Stephen Gould and Lei Zhang},
9 | title = {Bottom-Up and Top-Down Attention for Image Captioning and Visual Question Answering},
10 | booktitle={CVPR},
11 | year = {2018}
12 | }
13 | ```
14 |
15 | Please see [https://mmf.sh/docs/projects/butd](https://mmf.sh/docs/projects/butd) for more details on how to use BUTD model.
16 |
--------------------------------------------------------------------------------
/projects/butd/configs/coco/beam_search.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | butd:
6 | inference:
7 | type: beam_search
8 | params:
9 | beam_length: 5
10 |
11 | training:
12 | batch_size: 1
13 |
--------------------------------------------------------------------------------
/projects/butd/configs/coco/defaults.yaml:
--------------------------------------------------------------------------------
1 | optimizer:
2 | type: Adamax
3 | params:
4 | eps: 1.0e-08
5 | lr: 0.01
6 | weight_decay: 0
7 |
8 | evaluation:
9 | metrics:
10 | - caption_bleu4
11 |
12 | training:
13 | clip_norm_mode: all
14 | clip_gradients: true
15 | lr_ratio: 0.1
16 | lr_scheduler: true
17 | lr_steps:
18 | - 15000
19 | - 25000
20 | - 35000
21 | - 45000
22 | max_grad_l2_norm: 0.25
23 | max_updates: 50000
24 | use_warmup: true
25 | warmup_factor: 0.2
26 | warmup_iterations: 1000
27 | batch_size: 256
28 | num_workers: 7
29 | task_size_proportional_sampling: true
30 | early_stop:
31 | criteria: coco/caption_bleu4
32 | minimize: false
33 |
--------------------------------------------------------------------------------
/projects/butd/configs/coco/nucleus_sampling.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | butd:
6 | inference:
7 | type: nucleus_sampling
8 | params:
9 | sum_threshold: 0.8
10 |
11 | training:
12 | batch_size: 1
13 |
--------------------------------------------------------------------------------
/projects/butd/configs/conceptual_captions/beam_search.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | butd:
6 | inference:
7 | type: nucleus_sampling
8 | params:
9 | sum_threshold: 0.8
10 |
11 | training:
12 | batch_size: 1
13 |
--------------------------------------------------------------------------------
/projects/butd/configs/conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
1 | optimizer:
2 | type: Adamax
3 | params:
4 | eps: 1.0e-08
5 | lr: 0.01
6 | weight_decay: 0
7 |
8 | evaluation:
9 | metrics:
10 | - caption_bleu4
11 |
12 | training:
13 | clip_norm_mode: all
14 | clip_gradients: true
15 | lr_ratio: 0.1
16 | lr_scheduler: true
17 | lr_steps:
18 | - 15000
19 | - 25000
20 | - 35000
21 | - 45000
22 | max_grad_l2_norm: 0.25
23 | max_updates: 50000
24 | use_warmup: true
25 | warmup_factor: 0.2
26 | warmup_iterations: 1000
27 | batch_size: 256
28 | num_workers: 7
29 | task_size_proportional_sampling: true
30 | early_stop:
31 | criteria: conceptual_captions/caption_bleu4
32 | minimize: false
33 |
--------------------------------------------------------------------------------
/projects/butd/configs/conceptual_captions/nucleus_sampling.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | butd:
6 | inference:
7 | type: nucleus_sampling
8 | params:
9 | sum_threshold: 0.8
10 |
11 | training:
12 | batch_size: 1
13 |
--------------------------------------------------------------------------------
/projects/butd/configs/textcaps/beam_search.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | textcaps:
6 | zoo_requirements:
7 | - textvqa.defaults
8 | - textcaps.defaults
9 | annotations:
10 | val:
11 | - textcaps/defaults/annotations/imdb_val_filtered_by_image_id.npy
12 |
13 | model_config:
14 | butd: &butd
15 | inference:
16 | type: beam_search
17 | params:
18 | beam_length: 5
19 |
20 | training:
21 | batch_size: 1
22 |
--------------------------------------------------------------------------------
/projects/butd/configs/textcaps/eval_pretrained_coco_model.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./beam_search.yaml
3 |
4 | dataset_config:
5 | textcaps:
6 | processors:
7 | text_processor:
8 | params:
9 | vocab:
10 | vocab_file: textcaps/defaults/extras/vocabs/coco_vocabulary_captioning_thresh5.txt
11 | caption_processor:
12 | params:
13 | vocab:
14 | vocab_file: textcaps/defaults/extras/vocabs/coco_vocabulary_captioning_thresh5.txt
15 |
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/concat_bert/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/concat_bert/hateful_memes/defaults.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/concat_bow/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/concat_bow/hateful_memes/defaults.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/late_fusion/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/late_fusion/hateful_memes/defaults.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/mmbt/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../mmbt/configs/hateful_memes/defaults.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/mmbt/with_features.yaml:
--------------------------------------------------------------------------------
1 | ../../../mmbt/configs/hateful_memes/with_features.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/mmf_transformer/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../../../mmf_transformer/configs/hateful_memes/defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/unimodal/bert.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/unimodal/configs/hateful_memes/bert.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/unimodal/image.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/unimodal/configs/hateful_memes/image.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/unimodal/text.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/unimodal/configs/hateful_memes/text.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/unimodal/with_features.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/unimodal/configs/hateful_memes/with_features.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/vilbert/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../vilbert/configs/hateful_memes/defaults.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/vilbert/direct.yaml:
--------------------------------------------------------------------------------
1 | ../../../vilbert/configs/hateful_memes/direct.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/vilbert/from_cc.yaml:
--------------------------------------------------------------------------------
1 | ../../../vilbert/configs/hateful_memes/from_cc.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/visual_bert/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../visual_bert/configs/hateful_memes/defaults.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/visual_bert/direct.yaml:
--------------------------------------------------------------------------------
1 | ../../../visual_bert/configs/hateful_memes/direct.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/configs/visual_bert/from_coco.yaml:
--------------------------------------------------------------------------------
1 | ../../../visual_bert/configs/hateful_memes/from_coco.yaml
--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/attack_vectors.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./multilabel.yaml
3 | - configs/datasets/hateful_memes/fine_grained/attack_vectors.yaml
4 |
5 | model_config:
6 | visual_bert:
7 | num_labels: 9
8 |
--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../../../configs/visual_bert/defaults.yaml
3 | - configs/datasets/hateful_memes/fine_grained/with_features.yaml
4 |
5 | training:
6 | find_unused_parameters: false
7 | batch_size: 128
8 | max_updates: 10000
9 |
--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/hateful_pc_attack.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./multilabel.yaml
3 | - configs/datasets/hateful_memes/fine_grained/hateful_pc_attack.yaml
4 |
5 | model_config:
6 | visual_bert:
7 | num_labels: 17
8 |
--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/multilabel.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | visual_bert:
6 | training_head_type: classification
7 | num_labels: 9
8 | losses:
9 | - type: logit_bce
10 |
11 | evaluation:
12 | metrics:
13 | - accuracy
14 | - multilabel_macro_f1
15 | - multilabel_micro_f1
16 |
17 | training:
18 | find_unused_parameters: false
19 | early_stop:
20 | criteria: hateful_memes/multilabel_micro_f1
21 | minimize: false
22 |
--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/pc_attack.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./multilabel.yaml
3 | - configs/datasets/hateful_memes/fine_grained/pc_attack.yaml
4 |
5 | model_config:
6 | visual_bert:
7 | num_labels: 15
8 |
--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/protected_groups.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./multilabel.yaml
3 | - configs/datasets/hateful_memes/fine_grained/protected_groups.yaml
4 |
5 | model_config:
6 | visual_bert:
7 | num_labels: 7
8 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/conceptnet_only.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | krisp:
6 | graph_module:
7 | kg_path: okvqa/defaults/annotations/annotations/graphs/cn_graph.pth.tar
8 | node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_cn.pkl
9 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_cn.pth.tar
10 | dataset_config:
11 | okvqa:
12 | processors:
13 | answer_processor:
14 | params:
15 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_cn.pth.tar
16 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/dbpedia_only.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | krisp:
6 | graph_module:
7 | kg_path: okvqa/defaults/annotations/annotations/graphs/db_graph.pth.tar
8 | node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_db.pkl
9 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_db.pth.tar
10 | dataset_config:
11 | okvqa:
12 | processors:
13 | answer_processor:
14 | params:
15 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_db.pth.tar
16 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/haspart_only.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | krisp:
6 | graph_module:
7 | kg_path: okvqa/defaults/annotations/annotations/graphs/hp_graph.pth.tar
8 | node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_hp.pkl
9 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_hp.pth.tar
10 | dataset_config:
11 | okvqa:
12 | processors:
13 | answer_processor:
14 | params:
15 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_hp.pth.tar
16 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/okvqav10.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | krisp:
6 | graph_module:
7 | vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt
8 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10.pth.tar
9 | okvqa_v_mode: "v1.0-121"
10 | old_graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab.pth.tar
11 | ans_translation_file: okvqa/defaults/annotations/annotations/ans_vocab_tx.pth.tar
12 | num_labels: 2253
13 | num_labels: 2253
14 | dataset_config:
15 | okvqa:
16 | processors:
17 | answer_processor:
18 | params:
19 | vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt
20 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10.pth.tar
21 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/okvqav10_fromfullpretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | krisp:
6 | graph_module:
7 | vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt
8 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10_fp.pth.tar
9 | okvqa_v_mode: "v1.0-121-mc"
10 | old_graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab.pth.tar
11 | ans_translation_file: okvqa/defaults/annotations/annotations/ans_vocab_tx.pth.tar
12 | num_labels: 2253
13 | num_labels: 2253
14 | dataset_config:
15 | okvqa:
16 | processors:
17 | answer_processor:
18 | params:
19 | vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt
20 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10_fp.pth.tar
21 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/randomgraph.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | krisp:
6 | graph_module:
7 | kg_path: okvqa/defaults/annotations/annotations/graphs/random_graph.pth.tar
8 | node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_random.pkl
9 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_random.pth.tar
10 | dataset_config:
11 | okvqa:
12 | processors:
13 | answer_processor:
14 | params:
15 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_random.pth.tar
16 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | okvqa:
6 | annotations:
7 | train:
8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy
9 | val:
10 | - okvqa/defaults/annotations/annotations/imdb_test.npy
11 | test:
12 | - okvqa/defaults/annotations/annotations/imdb_test.npy
13 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_cnonly.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./conceptnet_only.yaml
3 |
4 | dataset_config:
5 | okvqa:
6 | annotations:
7 | train:
8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy
9 | val:
10 | - okvqa/defaults/annotations/annotations/imdb_test.npy
11 | test:
12 | - okvqa/defaults/annotations/annotations/imdb_test.npy
13 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_dbonly.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./dbpedia_only.yaml
3 |
4 | dataset_config:
5 | okvqa:
6 | annotations:
7 | train:
8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy
9 | val:
10 | - okvqa/defaults/annotations/annotations/imdb_test.npy
11 | test:
12 | - okvqa/defaults/annotations/annotations/imdb_test.npy
13 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_hponly.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./haspart_only.yaml
3 |
4 | dataset_config:
5 | okvqa:
6 | annotations:
7 | train:
8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy
9 | val:
10 | - okvqa/defaults/annotations/annotations/imdb_test.npy
11 | test:
12 | - okvqa/defaults/annotations/annotations/imdb_test.npy
13 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_okvqav10.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./okvqav10.yaml
3 |
4 | dataset_config:
5 | okvqa:
6 | annotations:
7 | train:
8 | - okvqa/defaults/annotations/annotations/imdb_trainval_v10.npy
9 | val:
10 | - okvqa/defaults/annotations/annotations/imdb_test_v10.npy
11 | test:
12 | - okvqa/defaults/annotations/annotations/imdb_test_v10.npy
13 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_okvqav10_fromfullpretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./okvqav10_fromfullpretrain.yaml
3 |
4 | dataset_config:
5 | okvqa:
6 | annotations:
7 | train:
8 | - okvqa/defaults/annotations/annotations/imdb_trainval_v10.npy
9 | val:
10 | - okvqa/defaults/annotations/annotations/imdb_test_v10.npy
11 | test:
12 | - okvqa/defaults/annotations/annotations/imdb_test_v10.npy
13 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_random.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./randomgraph.yaml
3 |
4 | dataset_config:
5 | okvqa:
6 | annotations:
7 | train:
8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy
9 | val:
10 | - okvqa/defaults/annotations/annotations/imdb_test.npy
11 | test:
12 | - okvqa/defaults/annotations/annotations/imdb_test.npy
13 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_vgonly.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./visualgenome_only.yaml
3 |
4 | dataset_config:
5 | okvqa:
6 | annotations:
7 | train:
8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy
9 | val:
10 | - okvqa/defaults/annotations/annotations/imdb_test.npy
11 | test:
12 | - okvqa/defaults/annotations/annotations/imdb_test.npy
13 |
--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/visualgenome_only.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | krisp:
6 | graph_module:
7 | kg_path: okvqa/defaults/annotations/annotations//graphs/vg_graph.pth.tar
8 | node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_vg.pkl
9 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_vg.pth.tar
10 | dataset_config:
11 | okvqa:
12 | processors:
13 | answer_processor:
14 | params:
15 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_vg.pth.tar
16 |
--------------------------------------------------------------------------------
/projects/krisp/configs/visual_bert/masked_coco/okvqa_safe.yaml:
--------------------------------------------------------------------------------
1 | optimizer:
2 | type: adam_w
3 | params:
4 | lr: 5e-5
5 | eps: 1e-8
6 |
7 | scheduler:
8 | type: warmup_cosine
9 | params:
10 | num_warmup_steps: 2000
11 | num_training_steps: 88000
12 |
13 | dataset_config:
14 | masked_coco:
15 | return_features_info: true
16 | features:
17 | train:
18 | - okvqa/defaults/features/features_fc6/COCO_trainval2014.lmdb
19 | val:
20 | - okvqa/defaults/features/features_fc6/COCO_trainval2014.lmdb
21 | test:
22 | - okvqa/defaults/features/features_fc6/COCO_trainval2014.lmdb
23 |
24 | training:
25 | find_unused_parameters: true
26 | batch_size: 56
27 | lr_scheduler: true
28 | # Don't forget to update schedule_attributes if you update this
29 | max_updates: 88000
30 |
--------------------------------------------------------------------------------
/projects/krisp/configs/visual_bert/okvqa/defaults_v10.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | visual_bert:
6 | num_labels: 2253
7 |
8 | dataset_config:
9 | okvqa:
10 | answer_processor:
11 | params:
12 | vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt
13 |
--------------------------------------------------------------------------------
/projects/krisp/configs/visual_bert/okvqa/train_val.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | okvqa:
6 | annotations:
7 | train:
8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy
9 | val:
10 | - okvqa/defaults/annotations/annotations/imdb_test.npy
11 | test:
12 | - okvqa/defaults/annotations/annotations/imdb_test.npy
13 |
--------------------------------------------------------------------------------
/projects/krisp/configs/visual_bert/okvqa/train_val_okvqav10.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults_v10.yaml
3 |
4 | dataset_config:
5 | okvqa:
6 | annotations:
7 | train:
8 | - okvqa/defaults/annoations/annotations/imdb_trainval_v10.npy
9 | val:
10 | - okvqa/defaults/annotations/annoations/imdb_test_v10.npy
11 | test:
12 | - okvqa/defaults/annotations/annoations/imdb_test_v10.npy
13 |
--------------------------------------------------------------------------------
/projects/krisp/requirements.txt:
--------------------------------------------------------------------------------
1 | networkx
2 | torch_geometric
3 | gensim
4 |
--------------------------------------------------------------------------------
/projects/lorra/configs/vqa2/train_val.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | # Use soft copy
5 | dataset_config:
6 | vqa2_train_val:
7 | use_ocr: true
8 | processors:
9 | context_processor:
10 | type: fasttext
11 | params:
12 | download_initially: true
13 | max_length: 50
14 | model_file: wiki.en.bin
15 | answer_processor:
16 | type: soft_copy_answer
17 | params:
18 | vocab_file: vqa2/defaults/extras/vocabs/answers_vqa.txt
19 | preprocessor:
20 | type: simple_word
21 | params: {}
22 | context_preprocessor:
23 | type: simple_word
24 | params: {}
25 | max_length: 50
26 | num_answers: 10
27 |
--------------------------------------------------------------------------------
/projects/lorra/configs/vqa2/train_val_resnet_only.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | vqa2_train_val:
6 | use_images: false
7 | use_features: true
8 | zoo_requirements:
9 | - coco.resnet152
10 | - vqa2.defaults
11 | features:
12 | train:
13 | - coco/resnet152/features/trainval2014.lmdb
14 | - coco/resnet152/features/trainval2014.lmdb
15 | val:
16 | - coco/resnet152/features/trainval2014.lmdb
17 | test:
18 | - coco/resnet152/features/test2015.lmdb
19 | annotations:
20 | train:
21 | - vqa2/defaults/annotations/imdb_train2014.npy
22 | - vqa2/defaults/annotations/imdb_valminusminival2014.npy
23 | val:
24 | - vqa2/defaults/annotations/imdb_minival2014.npy
25 |
26 | model_config:
27 | lorra:
28 | image_feature_encodings:
29 | - type: default
30 | params: {}
31 |
--------------------------------------------------------------------------------
/projects/lxmert/configs/coco/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../defaults.yaml
3 | - ./masked.yaml
4 |
--------------------------------------------------------------------------------
/projects/lxmert/configs/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/lxmert/defaults.yaml
3 |
4 | optimizer:
5 | type: adam_w
6 | params:
7 | lr: 1e-4
8 | eps: 1e-8
9 |
10 | training:
11 | seed: 9595
12 | batch_size: 4
13 | lr_scheduler: false
14 | find_unused_parameters: true
15 | use_warmup: true
16 | warmup_factor: 0.05
17 | warmup_iterations: 1000
18 | max_epochs: 20
19 | max_updates: null
20 | pin_memory: true
21 |
22 |
23 | evaluation:
24 | metrics:
25 | - vqa_accuracy
26 |
--------------------------------------------------------------------------------
/projects/lxmert/configs/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | - coco/masked.yaml
4 | - gqa/masked.yaml
5 | - visual_genome/masked.yaml
6 | - vqa2/masked.yaml
7 | - configs/models/lxmert/defaults.yaml
8 |
--------------------------------------------------------------------------------
/projects/lxmert/configs/visual_genome/masked.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | masked_visual_genome:
3 | use_features: true
4 | add_answer: true
5 | max_features: 36
6 | features:
7 | train:
8 | - visual_genome/detectron_fix_100/fc6/,visual_genome/resnet152/
9 | - visual_genome/detectron_fix_100/fc6/,visual_genome/resnet152/
10 | annotations:
11 | train:
12 | - imdb/visual_genome/vg_question_answers.jsonl
13 | - imdb/visual_genome/vg_question_answers_placeholder.jsonl
14 |
--------------------------------------------------------------------------------
/projects/lxmert/configs/visual_genome/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./masked.yaml
3 |
4 | optimizer:
5 | type: adam_w
6 | params:
7 | lr: 1e-4
8 | eps: 1e-8
9 |
10 | scheduler:
11 | type: warmup_linear
12 | params:
13 | num_warmup_steps: 1000
14 | num_training_steps: ${training.max_updates}
15 |
16 | training:
17 | batch_size: 480
18 | lr_scheduler: true
19 | # Don't forget to update schedule_attributes if you update this
20 | max_updates: 11000
21 |
--------------------------------------------------------------------------------
/projects/lxmert/configs/vqa2/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../defaults.yaml
3 | - ./masked.yaml
4 |
--------------------------------------------------------------------------------
/projects/m4c/README.md:
--------------------------------------------------------------------------------
1 | # Iterative Answer Prediction with Pointer-Augmented Multimodal Transformers for TextVQA
2 |
3 | This repository contains the code for M4C model from the following paper, released under the MMF:
4 |
5 | * R. Hu, A. Singh, T. Darrell, M. Rohrbach, *Iterative Answer Prediction with Pointer-Augmented Multimodal Transformers for TextVQA*. in CVPR, 2020 ([PDF](https://arxiv.org/pdf/1911.06258.pdf))
6 | ```
7 | @inproceedings{hu2020iterative,
8 | title={Iterative Answer Prediction with Pointer-Augmented Multimodal Transformers for TextVQA},
9 | author={Hu, Ronghang and Singh, Amanpreet and Darrell, Trevor and Rohrbach, Marcus},
10 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
11 | year={2020}
12 | }
13 | ```
14 |
15 | Please see [https://mmf.sh/docs/projects/m4c](https://mmf.sh/docs/projects/m4c) for more details on how to use m4c model.
16 |
--------------------------------------------------------------------------------
/projects/m4c/configs/textvqa/joint_with_stvqa.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | textvqa:
6 | use_images: false
7 | use_features: true
8 | zoo_requirements:
9 | - textvqa.defaults
10 | - textvqa.ocr_en
11 | - stvqa.defaults
12 | - stvqa.ocr_en
13 | features:
14 | train:
15 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
16 | - stvqa/defaults/features/detectron.lmdb,stvqa/ocr_en/features/ocr_en_frcn_features.lmdb
17 | annotations:
18 | train:
19 | - textvqa/defaults/annotations/imdb_train_ocr_en.npy
20 | - stvqa/defaults/annotations/imdb_subtrain.npy
21 |
--------------------------------------------------------------------------------
/projects/m4c/configs/textvqa/ocr_ml.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | textvqa:
6 | zoo_requirements:
7 | - textvqa.defaults
8 | - textvqa.ocr_ml
9 | features:
10 | train:
11 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_ml/features/ocr_ml_frcn_features.lmdb
12 | val:
13 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_ml/features/ocr_ml_frcn_features.lmdb
14 | test:
15 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_ml/features/ocr_ml_frcn_features.lmdb
16 |
17 | annotations:
18 | train:
19 | - textvqa/defaults/annotations/imdb_train_ocr_ml.npy
20 | val:
21 | - textvqa/defaults/annotations/imdb_val_ocr_ml.npy
22 | test:
23 | - textvqa/defaults/annotations/imdb_test_ocr_ml.npy
24 |
--------------------------------------------------------------------------------
/projects/m4c/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/projects/m4c_captioner/README.md:
--------------------------------------------------------------------------------
1 | # TextCaps: a Dataset for Image Captioning with Reading Comprehension
2 |
3 | This repository contains the code for M4C-Captioner model from the following paper, released under the MMF.
4 |
5 | * O. Sidorov, R. Hu, M. Rohrbach, A. Singh, *TextCaps: a Dataset for Image Captioning with Reading Comprehension*. in ECCV, 2020 ([PDF](https://arxiv.org/pdf/2003.12462.pdf))
6 | ```
7 | @inproceedings{sidorov2019textcaps,
8 | title={TextCaps: a Dataset for Image Captioningwith Reading Comprehension},
9 | author={Sidorov, Oleksii and Hu, Ronghang and Rohrbach, Marcus and Singh, Amanpreet},
10 | booktitle={European Conference on Computer Vision},
11 | year={2020}
12 | }
13 | ```
14 |
15 | Please see [https://mmf.sh/docs/projects/m4c_captioner](https://mmf.sh/docs/projects/m4c_captioner) for more details on how to use m4c_captioner model.
16 |
--------------------------------------------------------------------------------
/projects/m4c_captioner/configs/butd/textcaps:
--------------------------------------------------------------------------------
1 | ../../../butd/configs/textcaps
--------------------------------------------------------------------------------
/projects/m4c_captioner/configs/m4c_captioner/coco/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/datasets/coco/ocr_en.yaml
3 | optimizer:
4 | params:
5 | eps: 1.0e-08
6 | lr: 1e-4
7 | weight_decay: 0
8 | type: Adam
9 |
10 | evaluation:
11 | metrics:
12 | - textcaps_bleu4
13 |
14 | training:
15 | clip_norm_mode: all
16 | clip_gradients: true
17 | max_grad_l2_norm: 0.25
18 | lr_scheduler: true
19 | lr_steps:
20 | - 14000
21 | - 19000
22 | lr_ratio: 0.1
23 | use_warmup: true
24 | warmup_factor: 0.2
25 | warmup_iterations: 1000
26 | max_iterations: 24000
27 | batch_size: 128
28 | num_workers: 8
29 | early_stop:
30 | criteria: coco/textcaps_bleu4
31 | minimize: false
32 |
--------------------------------------------------------------------------------
/projects/m4c_captioner/configs/m4c_captioner/coco/eval_on_textcaps.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | textcaps:
6 | zoo_requirements:
7 | - textvqa.defaults
8 | - textvqa.ocr_en
9 | - textcaps.defaults
10 | use_images: false
11 | use_features: true
12 | features:
13 | val:
14 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
15 | test:
16 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
17 | annotations:
18 | val:
19 | - textcaps/defaults/annotations/imdb_val_filtered_by_image_id.npy # only one sample per image_id
20 | test:
21 | - textcaps/defaults/annotations/imdb_test_filtered_by_image_id.npy # only one sample per image_id
22 |
--------------------------------------------------------------------------------
/projects/m4c_captioner/configs/m4c_captioner/textcaps/with_caffe2_feat.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | textcaps:
6 | zoo_requirements:
7 | - textvqa.caffe2
8 | - textvqa.ocr_en
9 | - textcaps.defaults
10 | features:
11 | train:
12 | - textvqa/caffe2/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
13 | val:
14 | - textvqa/caffe2/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
15 | test:
16 | - textvqa/caffe2/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
17 |
--------------------------------------------------------------------------------
/projects/m4c_captioner/configs/m4c_captioner/textcaps/without_ocr.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | dataset_config:
4 | textcaps:
5 | use_ocr: False # remove all the OCRs from each image
6 |
--------------------------------------------------------------------------------
/projects/m4c_captioner/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/projects/mmbt/configs/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/mmbt/classification.yaml
3 | - configs/datasets/hateful_memes/bert.yaml
4 |
5 | scheduler:
6 | type: warmup_linear
7 | params:
8 | num_warmup_steps: 2000
9 | num_training_steps: ${training.max_updates}
10 |
11 | optimizer:
12 | type: adam_w
13 | params:
14 | lr: 1e-5
15 | eps: 1e-8
16 |
17 | evaluation:
18 | metrics:
19 | - accuracy
20 | - binary_f1
21 | - roc_auc
22 |
23 | training:
24 | batch_size: 32
25 | lr_scheduler: true
26 | max_updates: 22000
27 | early_stop:
28 | criteria: hateful_memes/roc_auc
29 | minimize: false
30 |
31 | checkpoint:
32 | pretrained_state_mapping:
33 | bert: bert
34 |
--------------------------------------------------------------------------------
/projects/mmbt/configs/hateful_memes/hateful_with_refiner.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/mmbt/classification.yaml
3 | - configs/datasets/hateful_memes/bert.yaml
4 |
5 | scheduler:
6 | type: warmup_linear
7 | params:
8 | num_warmup_steps: 2000
9 | num_training_steps: ${training.max_updates}
10 |
11 | optimizer:
12 | type: adam_w
13 | params:
14 | lr: 1e-5
15 | eps: 1e-8
16 |
17 | evaluation:
18 | metrics:
19 | - accuracy
20 | - binary_f1
21 | - roc_auc
22 |
23 | training:
24 | batch_size: 32
25 | lr_scheduler: true
26 | max_updates: 22000
27 | early_stop:
28 | criteria: hateful_memes/roc_auc
29 | minimize: false
30 |
31 | checkpoint:
32 | pretrained_state_mapping:
33 | bert: bert
34 |
--------------------------------------------------------------------------------
/projects/mmbt/configs/hateful_memes/with_features.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | - configs/models/mmbt/with_features.yaml
4 | - configs/datasets/hateful_memes/with_features.yaml
5 |
6 | optimizer:
7 | type: adam_w
8 | params:
9 | lr: 5e-5
10 | eps: 1e-8
11 |
--------------------------------------------------------------------------------
/projects/mmbt/configs/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/mmbt/pretrain.yaml
3 | - configs/models/mmbt/with_features.yaml
4 |
5 | scheduler:
6 | type: warmup_linear
7 | params:
8 | num_warmup_steps: 2000
9 | num_training_steps: ${training.max_updates}
10 |
11 | optimizer:
12 | type: adam_w
13 | params:
14 | lr: 5e-5
15 | eps: 1e-8
16 |
17 | training:
18 | batch_size: 128
19 | lr_scheduler: true
20 | max_updates: 22000
21 |
22 | checkpoint:
23 | pretrained_state_mapping:
24 | bert: bert
25 |
--------------------------------------------------------------------------------
/projects/mmbt/configs/mmimdb/with_features.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | - configs/models/mmbt/with_features.yaml
4 | - configs/datasets/mmimdb/with_features.yaml
5 |
--------------------------------------------------------------------------------
/projects/mmbt/configs/okvqa/with_images.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/mmbt/classification.yaml
3 |
4 | scheduler:
5 | type: warmup_linear
6 | params:
7 | num_warmup_steps: 2000
8 | num_training_steps: ${training.max_updates}
9 |
10 | dataset_config:
11 | okvqa:
12 | processors:
13 | text_processor:
14 | type: bert_tokenizer
15 | params:
16 | tokenizer_config:
17 | type: bert-base-uncased
18 | params:
19 | do_lower_case: true
20 | mask_probability: 0
21 | max_seq_length: 128
22 |
23 | model_config:
24 | mmbt:
25 | losses:
26 | - logit_bce
27 | num_labels: 2253
28 |
29 | optimizer:
30 | type: adam_w
31 | params:
32 | lr: 1e-5
33 | eps: 1e-8
34 |
35 | evaluation:
36 | metrics:
37 | - vqa_accuracy
38 |
39 | training:
40 | batch_size: 32
41 | lr_scheduler: true
42 | max_updates: 22000
43 | early_stop:
44 | criteria: okvqa/vqa_accuracy
45 | minimize: false
46 |
47 | checkpoint:
48 | pretrained_state_mapping:
49 | bert: bert
50 |
--------------------------------------------------------------------------------
/projects/mmbt/configs/vqa2/with_raw_images.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | - ../../../../mmf/configs/datasets/vqa2/with_raw_images.yaml
4 |
--------------------------------------------------------------------------------
/projects/mmf_transformer/configs/charades/direct.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/mmf_transformer/with_audio_video.yaml
3 |
4 | model_config:
5 | mmf_transformer:
6 | heads:
7 | - type: mlp
8 | num_labels: 157
9 |
10 | optimizer:
11 | type: adam_w
12 | params:
13 | lr: 5e-5
14 | eps: 1e-8
15 |
16 | scheduler:
17 | type: warmup_cosine
18 | params:
19 | num_warmup_steps: 2000
20 | num_training_steps: 60000
21 |
22 | evaluation:
23 | metrics:
24 | - multilabel_micro_f1
25 |
26 | training:
27 | batch_size: 8
28 | lr_scheduler: true
29 | # Don't forget to update schedule_attributes if you update this
30 | max_updates: 60000
31 | find_unused_parameters: true
32 | early_stop:
33 | criteria: charades/multilabel_micro_f1
34 | minimize: false
35 |
--------------------------------------------------------------------------------
/projects/mmf_transformer/configs/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/datasets/hateful_memes/bert.yaml
3 |
4 | model_config:
5 | mmf_transformer:
6 | training_head_type: classification
7 | num_labels: 2
8 | losses:
9 | - cross_entropy
10 |
11 | scheduler:
12 | type: warmup_linear
13 | params:
14 | num_warmup_steps: 2000
15 | num_training_steps: ${training.max_updates}
16 |
17 | optimizer:
18 | type: adam_w
19 | params:
20 | lr: 1e-5
21 | eps: 1e-8
22 |
23 | evaluation:
24 | metrics:
25 | - accuracy
26 | - binary_f1
27 | - roc_auc
28 |
29 | training:
30 | batch_size: 32
31 | lr_scheduler: true
32 | max_updates: 22000
33 | early_stop:
34 | criteria: hateful_memes/roc_auc
35 | minimize: false
36 |
37 | checkpoint:
38 | pretrained_state_mapping:
39 | pooler: pooler
40 | backend.transformer: backend.transformer
41 | backend.embeddings: backend.embeddings
42 |
--------------------------------------------------------------------------------
/projects/mmf_transformer/configs/masked_coco/pretrain_itm.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | model_config:
5 | mmf_transformer:
6 | heads:
7 | - type: itm
8 | freeze: false
9 | lr_multiplier: 1.0
10 | # default for bert base
11 | hidden_size: 768
12 |
13 | dataset_config:
14 | masked_coco:
15 | return_features_info: true
16 | false_caption: true
17 | false_caption_probability: 0.1
18 |
--------------------------------------------------------------------------------
/projects/mmf_transformer/localized_narratives/masked.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/datasets/localized_narratives/masked.yaml
3 | - configs/models/mmf_transformer/pretrain.yaml
4 |
5 | optimizer:
6 | type: adam_w
7 | params:
8 | lr: 5e-5
9 | eps: 1e-8
10 | scheduler:
11 | type: warmup_linear
12 | params:
13 | num_warmup_steps: 1000
14 | num_training_steps: 11000
15 | training:
16 | batch_size: 2
17 | lr_scheduler: true
18 | # Don't forget to update schedule_attributes if you update this
19 | max_updates: 11000
20 |
--------------------------------------------------------------------------------
/projects/others/cnn_lstm/clevr/defaults.yaml:
--------------------------------------------------------------------------------
1 | optimizer:
2 | type: Adamax
3 | params:
4 | eps: 1.0e-08
5 | lr: 0.01
6 | weight_decay: 0
7 |
8 | evaluation:
9 | metrics:
10 | - accuracy
11 |
12 | training:
13 | batch_size: 128
14 | snapshot_interval: 6000
15 | early_stop:
16 | criteria: clevr/accuracy
17 | minimize: false
18 |
--------------------------------------------------------------------------------
/projects/others/cnn_lstm/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | cnn_lstm:
3 | losses:
4 | - type: cross_entropy
5 | classifier:
6 | type: mlp
7 | params:
8 | in_dim: 190
9 | out_dim: 2
10 |
11 | scheduler:
12 | type: warmup_linear
13 | params:
14 | num_warmup_steps: 2000
15 | num_training_steps: ${training.max_updates}
16 |
17 | optimizer:
18 | type: adam_w
19 | params:
20 | lr: 5e-5
21 | eps: 1e-8
22 |
23 | evaluation:
24 | metrics:
25 | - accuracy
26 | - binary_f1
27 | - roc_auc
28 |
29 | training:
30 | batch_size: 480
31 | lr_scheduler: true
32 | max_updates: 60000
33 | early_stop:
34 | criteria: hateful_memes/roc_auc
35 | minimize: false
36 |
--------------------------------------------------------------------------------
/projects/others/concat_bert/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/datasets/hateful_memes/bert.yaml
3 |
4 | model_config:
5 | concat_bert:
6 | classifier:
7 | type: mlp
8 | params:
9 | num_layers: 2
10 | losses:
11 | - type: cross_entropy
12 |
13 | scheduler:
14 | type: warmup_linear
15 | params:
16 | num_warmup_steps: 2000
17 | num_training_steps: ${training.max_updates}
18 |
19 | optimizer:
20 | type: adam_w
21 | params:
22 | lr: 1e-5
23 | eps: 1e-8
24 |
25 | evaluation:
26 | metrics:
27 | - accuracy
28 | - binary_f1
29 | - roc_auc
30 |
31 | training:
32 | batch_size: 64
33 | lr_scheduler: true
34 | max_updates: 22000
35 | early_stop:
36 | criteria: hateful_memes/roc_auc
37 | minimize: false
38 |
39 | checkpoint:
40 | pretrained_state_mapping:
41 | base: base
42 |
--------------------------------------------------------------------------------
/projects/others/concat_bow/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | concat_bow:
3 | classifier:
4 | type: mlp
5 | params:
6 | num_layers: 2
7 | losses:
8 | - type: cross_entropy
9 |
10 | scheduler:
11 | type: warmup_linear
12 | params:
13 | num_warmup_steps: 2000
14 | num_training_steps: ${training.max_updates}
15 |
16 | optimizer:
17 | type: adam_w
18 | params:
19 | lr: 5e-5
20 | eps: 1e-8
21 |
22 | evaluation:
23 | metrics:
24 | - accuracy
25 | - binary_f1
26 | - roc_auc
27 |
28 | training:
29 | batch_size: 32
30 | lr_scheduler: true
31 | max_updates: 22000
32 | early_stop:
33 | criteria: hateful_memes/roc_auc
34 | minimize: false
35 |
36 | checkpoint:
37 | pretrained_state_mapping:
38 | base: base
39 |
--------------------------------------------------------------------------------
/projects/others/late_fusion/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/datasets/hateful_memes/bert.yaml
3 |
4 | model_config:
5 | late_fusion:
6 | modal_classifier:
7 | type: mlp
8 | params:
9 | num_layers: 2
10 | text_classifier:
11 | type: mlp
12 | params:
13 | num_layers: 2
14 | losses:
15 | - type: cross_entropy
16 |
17 | scheduler:
18 | type: warmup_linear
19 | params:
20 | num_warmup_steps: 2000
21 | num_training_steps: ${training.max_updates}
22 |
23 | optimizer:
24 | type: adam_w
25 | params:
26 | lr: 5e-5
27 | eps: 1e-8
28 |
29 | evaluation:
30 | metrics:
31 | - accuracy
32 | - binary_f1
33 | - roc_auc
34 |
35 | training:
36 | batch_size: 64
37 | lr_scheduler: true
38 | max_updates: 22000
39 | early_stop:
40 | criteria: hateful_memes/roc_auc
41 | minimize: false
42 |
43 | checkpoint:
44 | pretrained_state_mapping:
45 | base: base
46 |
--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | masked_coco:
3 | return_features_info: true
4 |
5 | optimizer:
6 | type: adam_w
7 | params:
8 | lr: 5e-5
9 | eps: 1e-8
10 |
11 | scheduler:
12 | type: warmup_linear
13 | params:
14 | num_warmup_steps: 1000
15 | num_training_steps: 11000
16 |
17 | training:
18 | batch_size: 480
19 | lr_scheduler: true
20 | # Don't forget to update schedule_attributes if you update this
21 | max_updates: 11000
22 |
--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_coco/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_coco/pretrain_joint_vqa2.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | - ../masked_vqa2/defaults.yaml
4 |
5 | model_config:
6 | mmf_bert:
7 | training_head_type: pretraining,vqa
8 |
9 | scheduler:
10 | type: warmup_linear
11 | params:
12 | num_warmup_steps: 3000
13 | num_training_steps: 33000
14 |
15 | training:
16 | max_updates: 34000
17 |
--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../masked_coco/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | return_features_info: true
7 |
--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_conceptual_captions/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../masked_coco/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_vqa2:
6 | annotations:
7 | train:
8 | - vqa2/defaults/annotations/imdb_train2014_len_coco.npy
9 | return_features_info: true
10 |
--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_vqa2/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/visual_entailment/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | mmf_bert:
3 | training_head_type: visual_entailment
4 | losses:
5 | - type: cross_entropy
6 |
7 | dataset_config:
8 | visual_entailment:
9 | return_features_info: true
10 | processors:
11 | text_processor:
12 | type: bert_tokenizer
13 | params:
14 | tokenizer_config:
15 | type: bert-base-uncased
16 | params:
17 | do_lower_case: true
18 | mask_probability: 0
19 | max_seq_length: 128
20 |
21 | optimizer:
22 | type: adam_w
23 | params:
24 | lr: 5e-5
25 | eps: 1e-8
26 |
27 | scheduler:
28 | type: warmup_linear
29 | params:
30 | num_warmup_steps: 6000
31 | num_training_steps: 60000
32 |
33 | evaluation:
34 | metrics:
35 | - accuracy
36 |
37 | training:
38 | early_stop:
39 | criteria: visual_entailment/accuracy
40 | minimize: false
41 |
--------------------------------------------------------------------------------
/projects/others/unimodal/configs/hateful_memes/bert.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./text.yaml
3 | - configs/datasets/hateful_memes/bert.yaml
4 | - configs/models/unimodal/bert.yaml
5 |
6 | model_config:
7 | unimodal_text:
8 | classifier:
9 | type: mlp
10 | params:
11 | in_dim: 768
12 | num_layers: 2
13 |
14 | training:
15 | batch_size: 128
16 |
--------------------------------------------------------------------------------
/projects/others/unimodal/configs/hateful_memes/image.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | unimodal_image:
3 | classifier:
4 | type: mlp
5 | params:
6 | num_layers: 2
7 | losses:
8 | - type: cross_entropy
9 |
10 | scheduler:
11 | type: warmup_linear
12 | params:
13 | num_warmup_steps: 2000
14 | num_training_steps: ${training.max_updates}
15 |
16 | optimizer:
17 | type: adam_w
18 | params:
19 | lr: 1e-5
20 | eps: 1e-8
21 |
22 | evaluation:
23 | metrics:
24 | - accuracy
25 | - binary_f1
26 | - roc_auc
27 |
28 | training:
29 | batch_size: 32
30 | lr_scheduler: true
31 | max_updates: 22000
32 | early_stop:
33 | criteria: hateful_memes/roc_auc
34 | minimize: false
35 |
36 | checkpoint:
37 | pretrained_state_mapping:
38 | base: base
39 |
--------------------------------------------------------------------------------
/projects/others/unimodal/configs/hateful_memes/text.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | unimodal_text:
3 | classifier:
4 | type: mlp
5 | params:
6 | num_layers: 2
7 | losses:
8 | - type: cross_entropy
9 |
10 | scheduler:
11 | type: warmup_linear
12 | params:
13 | num_warmup_steps: 2000
14 | num_training_steps: ${training.max_updates}
15 |
16 | optimizer:
17 | type: adam_w
18 | params:
19 | lr: 5e-5
20 | eps: 1e-8
21 |
22 | evaluation:
23 | metrics:
24 | - accuracy
25 | - binary_f1
26 | - roc_auc
27 |
28 | training:
29 | batch_size: 32
30 | lr_scheduler: true
31 | max_updates: 22000
32 | early_stop:
33 | criteria: hateful_memes/roc_auc
34 | minimize: false
35 |
36 | checkpoint:
37 | pretrained_state_mapping:
38 | base: base
39 |
--------------------------------------------------------------------------------
/projects/others/unimodal/configs/hateful_memes/with_features.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./image.yaml
3 | - configs/datasets/hateful_memes/with_features.yaml
4 | - configs/models/unimodal/with_features.yaml
5 |
6 |
7 | optimizer:
8 | type: adam_w
9 | params:
10 | lr: 5e-5
11 | eps: 1e-8
12 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_coco/defaults.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_coco/fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_coco/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_coco:
6 | annotations:
7 | train:
8 | - coco/defaults/annotations/imdb_karpathy_train_by_image_50_pc.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_coco/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_coco/pretrain.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_coco/ten_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_coco/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_coco:
6 | annotations:
7 | train:
8 | - coco/defaults/annotations/imdb_karpathy_train_by_image_10_pc.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_conceptual_captions/defaults.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_conceptual_captions/pretrain.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/full_coco_generated.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | zoo_requirements:
7 | - cc.coco_generated
8 | - cc.defaults
9 | annotations:
10 | train:
11 | - cc/coco_generated/annotations/train_all.npy
12 | val:
13 | - cc/coco_generated/annotations/val.npy
14 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/half.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | annotations:
7 | train:
8 | - cc/defaults/annotations/train_mid.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/half_coco_generated.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | zoo_requirements:
7 | - cc.coco_generated
8 | - cc.defaults
9 | annotations:
10 | train:
11 | - cc/coco_generated/annotations/train_mid.npy
12 | val:
13 | - cc/coco_generated/annotations/val.npy
14 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | annotations:
7 | train:
8 | - cc/defaults/annotations/train_small.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small_coco_generated.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | zoo_requirements:
7 | - cc.coco_generated
8 | - cc.defaults
9 | annotations:
10 | train:
11 | - cc/coco_generated/annotations/train_small.npy
12 | val:
13 | - cc/coco_generated/annotations/val.npy
14 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small_fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | annotations:
7 | train:
8 | - cc/defaults/annotations/train_small_50_pc.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small_ten_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | annotations:
7 | train:
8 | - cc/defaults/annotations/train_small_10_pc.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_vqa2/defaults.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_vqa2/fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_vqa2/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_vqa2:
6 | annotations:
7 | train:
8 | - vqa2/defaults/annotations/imdb_train2014_len_coco_50_pc.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_vqa2/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_vqa2/pretrain.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_vqa2/ten_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_vqa2/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_vqa2:
6 | return_features_info: true
7 | use_images: false
8 | use_features: true
9 | features:
10 | train:
11 | - coco/defaults/features/coco_trainval2014.lmdb
12 | annotations:
13 | train:
14 | - vqa2/defaults/annotations/imdb_train2014_len_coco_10_pc.npy
15 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_coco/defaults.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_coco/fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_coco/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_coco:
6 | annotations:
7 | train:
8 | - coco/defaults/annotations/imdb_karpathy_train_by_image_50_pc.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_coco/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_coco/pretrain.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_coco/full_train_val.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_coco/pretrain_train_val.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_coco/ten_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_coco/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_coco:
6 | annotations:
7 | train:
8 | - coco/defaults/annotations/imdb_karpathy_train_by_image_10_pc.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_conceptual_captions/defaults.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_conceptual_captions/pretrain.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/full_coco_generated.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | zoo_requirements:
7 | - cc.coco_generated
8 | - cc.defaults
9 | annotations:
10 | train:
11 | - cc/coco_generated/annotations/train_all.npy
12 | val:
13 | - cc/coco_generated/annotations/val.npy
14 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/half.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | annotations:
7 | train:
8 | - cc/defaults/annotations/train_mid.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/half_coco_generated.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | zoo_requirements:
7 | - cc.coco_generated
8 | - cc.defaults
9 |
10 | annotations:
11 | train:
12 | - cc/coco_generated/annotations/train_mid.npy
13 | val:
14 | - cc/coco_generated/annotations/val.npy
15 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | annotations:
7 | train:
8 | - cc/defaults/annotations/train_small.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small_coco_generated.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | zoo_requirements:
7 | - cc.coco_generated
8 | - cc.defaults
9 |
10 | annotations:
11 | train:
12 | - cc/coco_generated/annotations/train_small.npy
13 | val:
14 | - cc/coco_generated/annotations/val.npy
15 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small_fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | annotations:
7 | train:
8 | - cc/defaults/annotations/train_small_50_pc.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small_ten_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_conceptual_captions:
6 | annotations:
7 | train:
8 | - cc/defaults/annotations/train_small_10_pc.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_vqa2/defaults.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_vqa2/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_vqa2:
6 | annotations:
7 | train:
8 | - vqa2/defaults/annotations/imdb_train2014_len_coco_50_pc.npy
9 |
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_vqa2/pretrain.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/full_train_val.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_vqa2/pretrain_train_val.yaml
--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/ten_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_vqa2/pretrain.yaml
3 |
4 | dataset_config:
5 | masked_vqa2:
6 | annotations:
7 | train:
8 | - vqa2/defaults/annotations/imdb_train2014_len_coco_10_pc.npy
9 |
--------------------------------------------------------------------------------
/projects/pythia/configs/masked_q_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./pythia.yaml
3 |
4 | evaluation:
5 | metrics:
6 | - accuracy
7 |
8 | training:
9 | early_stop:
10 | criteria: masked_q_vqa2/accuracy
11 | minimize: false
12 |
--------------------------------------------------------------------------------
/projects/pythia/configs/textvqa/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/datasets/textvqa/with_resnet.yaml
3 | optimizer:
4 | type: Adamax
5 | params:
6 | lr: 0.005
7 |
8 | evaluation:
9 | metrics:
10 | - vqa_accuracy
11 |
12 | training:
13 | clip_norm_mode: all
14 | clip_gradients: false
15 | max_grad_l2_norm: 0.25
16 | lr_scheduler: true
17 | lr_steps:
18 | - 14000
19 | lr_ratio: 0.01
20 | use_warmup: true
21 | warmup_factor: 0.2
22 | warmup_iterations: 1000
23 | max_updates: 24000
24 | batch_size: 128
25 | num_workers: 7
26 | task_size_proportional_sampling: true
27 | early_stop:
28 | criteria: textvqa/vqa_accuracy
29 | minimize: false
30 |
31 | checkpoint:
32 | pretrained_state_mapping:
33 | text_embeddings: text_embeddings
34 | image_feature_encoders: image_feature_encoders
35 | image_feature_embeddings_list: image_feature_embeddings_list
36 | image_text_multi_modal_combine_layer: image_text_multi_modal_combine_layer
37 |
--------------------------------------------------------------------------------
/projects/pythia/configs/visual_genome/defaults.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | visual_genome:
3 | return_scene_graph: false
4 | return_objects: false
5 | return_relationships: false
6 | return_features_info: false
7 | no_unk: true
8 |
9 | evaluation:
10 | metrics:
11 | - vqa_accuracy
12 |
13 | training:
14 | early_stop:
15 | criteria: visual_genome/vqa_accuracy
16 | minimize: false
17 |
--------------------------------------------------------------------------------
/projects/pythia/configs/vizwiz/defaults.yaml:
--------------------------------------------------------------------------------
1 | optimizer:
2 | type: Adamax
3 | params:
4 | lr: 0.005
5 |
6 | evaluation:
7 | metrics:
8 | - vqa_accuracy
9 |
10 | training:
11 | clip_norm_mode: all
12 | clip_gradients: true
13 | max_grad_l2_norm: 0.25
14 | lr_scheduler: true
15 | lr_steps:
16 | - 14000
17 | lr_ratio: 0.01
18 | use_warmup: true
19 | warmup_factor: 0.2
20 | warmup_iterations: 1000
21 | max_updates: 24000
22 | batch_size: 128
23 | num_workers: 7
24 | task_size_proportional_sampling: true
25 | early_stop:
26 | criteria: vizwiz/vqa_accuracy
27 | minimize: false
28 |
29 | checkpoint:
30 | pretrained_state_mapping:
31 | word_embedding: word_embedding
32 | text_embeddings: text_embeddings
33 | image_feature_encoders: image_feature_encoders
34 | image_feature_embeddings_list: image_feature_embeddings_list
35 | image_text_multi_modal_combine_layer: image_text_multi_modal_combine_layer
36 |
--------------------------------------------------------------------------------
/projects/pythia/configs/vqa2/debug.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | vqa2:
6 | use_images: false
7 | use_features: true
8 | features:
9 | train:
10 | - coco/defaults/features/trainval2014.lmdb
11 | val:
12 | - coco/defaults/features/trainval2014.lmdb
13 | annotations:
14 | train:
15 | - vqa2/defaults/annotations/imdb_debug.npy
16 | val:
17 | - vqa2/defaults/annotations/imdb_debug.npy
18 |
--------------------------------------------------------------------------------
/projects/pythia/configs/vqa2/resnet_only.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | vqa2:
6 | use_images: false
7 | use_features: true
8 | zoo_requirements:
9 | - coco.resnet152
10 | - vqa2.defaults
11 | features:
12 | train:
13 | - coco/resnet152/features/trainval2014.lmdb
14 | val:
15 | - coco/resnet152/features/trainval2014.lmdb
16 | test:
17 | - coco/resnet152/features/test2015.lmdb
18 | model_config:
19 | pythia:
20 | image_feature_encodings:
21 | - type: default
22 | params: {}
23 |
--------------------------------------------------------------------------------
/projects/pythia/configs/vqa2/train_val.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | vqa2:
6 | use_images: false
7 | use_features: true
8 | features:
9 | train:
10 | - coco/defaults/features/trainval2014.lmdb,coco/resnet152/features/trainval2014.lmdb
11 | - coco/defaults/features/trainval2014.lmdb,coco/resnet152/features/trainval2014.lmdb
12 | val:
13 | - coco/defaults/features/trainval2014.lmdb,coco/resnet152/features/trainval2014.lmdb
14 | test:
15 | - coco/defaults/features/test2015.lmdb,coco/resnet152/features/test2015.lmdb
16 | annotations:
17 | train:
18 | - vqa2/defaults/annotations/imdb_train2014.npy
19 | - vqa2/defaults/annotations/imdb_val2014.npy
20 | val:
21 | - vqa2/defaults/annotations/imdb_val2014.npy
22 |
--------------------------------------------------------------------------------
/projects/pythia/configs/vqa2/train_val_resnet_only.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./resnet_only.yaml
3 |
4 | dataset_config:
5 | vqa2:
6 | use_images: false
7 | use_features: true
8 | features:
9 | train:
10 | - coco/resnet152/features/trainval2014.lmdb
11 | - coco/resnet152/features/trainval2014.lmdb
12 | val:
13 | - coco/resnet152/features/trainval2014.lmdb
14 | test:
15 | - coco/resnet152/features/test2015.lmdb
16 | annotations:
17 | train:
18 | - vqa2/defaults/annotations/imdb_train2014.npy
19 | - vqa2/defaults/annotations/imdb_valminusminival2014.npy
20 | val:
21 | - vqa2/defaults/annotations/imdb_minival2014.npy
22 |
--------------------------------------------------------------------------------
/projects/unit/README.md:
--------------------------------------------------------------------------------
1 | # UniT: Multimodal Multitask Learning with a Unified Transformer
2 |
3 | This repository contains the code for UniT model from the following paper, released under the MMF:
4 |
5 | - R. Hu, A. Singh. _UniT: Multimodal Multitask Learning with a Unified Transformer_. arXiv preprint arXiv:2102.10772, 2021 ([PDF](https://arxiv.org/pdf/2102.10772.pdf))
6 |
7 | ```
8 | @article{hu2021unit,
9 | title={UniT: Multimodal multitask learning with a unified transformer},
10 | author={Hu, Ronghang and Singh, Amanpreet},
11 | journal={arXiv preprint arXiv:2102.10772},
12 | year={2021}
13 | }
14 | ```
15 |
16 | Please see [https://mmf.sh/docs/projects/unit](https://mmf.sh/docs/projects/unit) for more details on how to use the UniT model.
17 |
--------------------------------------------------------------------------------
/projects/unit/configs/all_8_datasets/separate_dec.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./shared_dec.yaml
3 |
4 | model_config:
5 | unit:
6 | base_args:
7 | share_decoders: false
8 |
9 | optimizer:
10 | type: adam_w # HuggingFace transformer's AdamW
11 |
--------------------------------------------------------------------------------
/projects/unit/configs/all_8_datasets/shared_dec_without_task_embedding.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./shared_dec.yaml
3 |
4 | model_config:
5 | unit:
6 | base_args:
7 | use_task_embedding_in_img_encoder: false
8 | use_task_embedding_in_lang_encoder: false
9 |
--------------------------------------------------------------------------------
/projects/unit/configs/coco/single_task_without_task_embedding.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./single_task.yaml
3 |
4 | model_config:
5 | unit:
6 | base_args:
7 | use_task_embedding_in_img_encoder: false
8 | use_task_embedding_in_lang_encoder: false
9 |
--------------------------------------------------------------------------------
/projects/unit/configs/coco_vg_vqa2/separate_dec.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./shared_dec.yaml
3 |
4 | model_config:
5 | unit:
6 | base_args:
7 | share_decoders: false
8 |
9 | optimizer:
10 | type: adam_w # HuggingFace transformer's AdamW
11 |
--------------------------------------------------------------------------------
/projects/unit/configs/coco_vqa2/separate_dec.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./shared_dec.yaml
3 |
4 | model_config:
5 | unit:
6 | base_args:
7 | share_decoders: false
8 |
9 | optimizer:
10 | type: adam_w # HuggingFace transformer's AdamW
11 |
--------------------------------------------------------------------------------
/projects/unit/configs/vg_vqa2/separate_dec.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./shared_dec.yaml
3 |
4 | model_config:
5 | unit:
6 | base_args:
7 | share_decoders: false
8 |
9 | optimizer:
10 | type: adam_w # HuggingFace transformer's AdamW
11 |
--------------------------------------------------------------------------------
/projects/unit/configs/visual_entailment_dataset_cfg.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | visual_entailment:
3 | zoo_requirements:
4 | - visual_entailment.defaults
5 | - flickr30k.defaults
6 | use_features: false
7 | use_images: true
8 | processors:
9 | image_processor:
10 | type: torchvision_transforms
11 | params:
12 | transforms:
13 | - type: ResizeShortest
14 | params:
15 | min_size: 800
16 | max_size: 1333
17 | - ToTensor
18 | - type: Normalize
19 | params:
20 | mean: [0.485, 0.456, 0.406]
21 | std: [0.229, 0.224, 0.225]
22 | text_processor:
23 | type: bert_tokenizer
24 | params:
25 | tokenizer_config:
26 | type: bert-base-uncased
27 | params:
28 | do_lower_case: true
29 | mask_probability: 0
30 | max_seq_length: 25
31 |
--------------------------------------------------------------------------------
/projects/uniter/README.md:
--------------------------------------------------------------------------------
1 | # UNITER
2 |
3 | This repository contains the code for pytorch implementation of UNITER model, released originally under this ([repo](https://github.com/ChenRocks/UNITER/)). Please cite the following papers if you are using UNITER model from mmf:
4 |
5 | * Chen, Y.-C., Li, L., Yu, L., Kholy, A. E., Ahmed, F., Gan,
6 | Z., Cheng, Y., and jing Liu, J. *Uniter: Universal imagetext representation learning.* In European Conference on
7 | Computer Vision, 2020b. ([arXiV](https://arxiv.org/pdf/1909.11740))
8 | ```
9 | @inproceedings{chen2020uniter,
10 | title={Uniter: Universal image-text representation learning},
11 | author={Chen, Yen-Chun and Li, Linjie and Yu, Licheng and Kholy, Ahmed El and Ahmed, Faisal and Gan, Zhe and Cheng, Yu and Liu, Jingjing},
12 | booktitle={ECCV},
13 | year={2020}
14 | }
15 | ```
16 |
17 |
18 | Please see [https://mmf.sh/docs/projects/uniter](https://mmf.sh/docs/projects/uniter) for more details on how to use the UNITER model.
19 |
--------------------------------------------------------------------------------
/projects/vilbert/configs/hateful_memes/direct.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/vilbert/configs/hateful_memes/from_cc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | checkpoint:
5 | resume_pretrained: true
6 | resume_zoo: vilbert.pretrained.cc.original
7 |
--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | vilbert:
3 | training_head_type: pretraining
4 |
5 | dataset_config:
6 | masked_coco:
7 | return_features_info: true
8 | use_image_feature_masks: true
9 |
10 | optimizer:
11 | type: adam_w
12 | params:
13 | lr: 5e-5
14 | eps: 1e-8
15 |
16 | scheduler:
17 | type: warmup_linear
18 | params:
19 | num_warmup_steps: 1000
20 | num_training_steps: 11000
21 |
22 | training:
23 | batch_size: 480
24 | lr_scheduler: true
25 | # Don't forget to update schedule_attributes if you update this
26 | max_updates: 11000
27 | find_unused_parameters: true
28 |
--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_coco/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_coco/pretrain_train_val.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | masked_coco:
6 | return_features_info: true
7 | use_images: false
8 | use_features: true
9 | features:
10 | train:
11 | - coco/defaults/features/coco_trainval2014.lmdb
12 | - coco/defaults/features/coco_trainval2014.lmdb
13 | annotations:
14 | train:
15 | - coco/defaults/annotations/imdb_karpathy_train_by_image.npy
16 | - coco/defaults/annotations/imdb_karpathy_val_by_image.npy
17 |
--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | vilbert:
3 | training_head_type: pretraining
4 |
5 | dataset_config:
6 | masked_conceptual_captions:
7 | return_features_info: true
8 | use_image_feature_masks: true
9 |
10 | optimizer:
11 | type: adam_w
12 | params:
13 | lr: 5e-5
14 | eps: 1e-8
15 |
16 | scheduler:
17 | type: warmup_linear
18 | params:
19 | num_warmup_steps: 1000
20 | num_training_steps: 11000
21 |
22 | training:
23 | batch_size: 480
24 | lr_scheduler: true
25 | # Don't forget to update schedule_attributes if you update this
26 | max_updates: 11000
27 | find_unused_parameters: true
28 |
--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | vilbert:
3 | training_head_type: pretraining
4 |
5 | dataset_config:
6 | masked_vqa2:
7 | annotations:
8 | train:
9 | - vqa2/defaults/annotations/imdb_train2014.npy
10 | return_features_info: true
11 | use_image_feature_masks: true
12 |
13 | optimizer:
14 | type: adam_w
15 | params:
16 | lr: 5e-5
17 | eps: 1e-8
18 |
19 | scheduler:
20 | type: warmup_linear
21 | params:
22 | num_warmup_steps: 1000
23 | num_training_steps: 11000
24 |
25 | training:
26 | batch_size: 480
27 | lr_scheduler: true
28 | # Don't forget to update schedule_attributes if you update this
29 | max_updates: 11000
30 | find_unused_parameters: true
31 |
--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_vqa2/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_vqa2/pretrain_train_val.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | masked_vqa2:
6 | use_images: false
7 | use_features: true
8 | features:
9 | train:
10 | - coco/defaults/features/coco_trainval2014.lmdb
11 | - coco/defaults/features/coco_trainval2014.lmdb
12 | annotations:
13 | train:
14 | - vqa2/defaults/annotations/imdb_train2014.npy
15 | - vqa2/defaults/annotations/imdb_val2014.npy
16 | return_features_info: true
17 | use_image_feature_masks: true
18 |
--------------------------------------------------------------------------------
/projects/vilbert/configs/mmimdb/pretrain.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/projects/vilbert/configs/mmimdb/pretrain.yaml
--------------------------------------------------------------------------------
/projects/vilbert/configs/vqa2/train_val.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | vqa2:
6 | use_images: false
7 | use_features: true
8 | features:
9 | train:
10 | - coco/defaults/features/coco_trainval2014.lmdb
11 | - coco/defaults/features/coco_trainval2014.lmdb
12 | annotations:
13 | train:
14 | - vqa2/defaults/annotations/imdb_train2014.npy
15 | - vqa2/defaults/annotations/imdb_val2014.npy
16 | return_features_info: true
17 |
--------------------------------------------------------------------------------
/projects/vilt/README.md:
--------------------------------------------------------------------------------
1 | # ViLT
2 |
3 | This repository contains the code for pytorch implementation of ViLT model, released originally under this ([repo](https://github.com/dandelin/ViLT)). Please cite the following papers if you are using ViLT model from mmf:
4 |
5 | * Wonjae Kim, Bokyung Son, and Ildoo Kim. 2021. *ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision}*. In 38th International Conference on Machine Learning (ICML). ([arXiV](https://arxiv.org/pdf/2102.03334))
6 | ```
7 | @misc{kim2021vilt,
8 | title={ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision},
9 | author={Wonjae Kim and Bokyung Son and Ildoo Kim},
10 | year={2021},
11 | eprint={2102.03334},
12 | archivePrefix={arXiv},
13 | primaryClass={stat.ML}
14 | }
15 | ```
16 |
17 | Please see [https://mmf.sh/docs/projects/vilt](https://mmf.sh/docs/projects/vilt) for more details on how to use the ViLT model.
18 |
--------------------------------------------------------------------------------
/projects/vilt/configs/vqa2/vit_b16_224.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/unit/configs/vqa2_dataset_cfg.yaml
3 | - ./defaults.yaml
4 |
5 | other_configs:
6 | image_w: 224
7 | image_h: 224
8 | hidden_dim: 768
9 |
10 | model_config:
11 | vilt:
12 | image_encoder:
13 | type: vit
14 | params:
15 | random_init: False
16 | pretrained_model_name: google/vit-base-patch16-224
17 | image_size:
18 | - ${other_configs.image_w}
19 | - ${other_configs.image_h}
20 | hidden_dim: ${other_configs.hidden_dim}
21 | pretrained_model: vit_base_patch16_224
22 | mlp_dim: 3072
23 |
24 | text_embeddings:
25 | type: vilt_text_embedding
26 | bert_model_name: bert-base-uncased
27 | hidden_dim: ${other_configs.hidden_size}
28 | hidden_size: 768
29 | max_position_embeddings: 512
30 | random_init: False
31 |
--------------------------------------------------------------------------------
/projects/vilt/configs/vqa2/vit_b32_384.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/unit/configs/vqa2_dataset_cfg.yaml
3 | - ./defaults.yaml
4 |
5 | other_configs:
6 | image_w: 384
7 | image_h: 384
8 | hidden_dim: 768
9 |
10 | model_config:
11 | vilt:
12 | image_encoder:
13 | type: vit
14 | params:
15 | random_init: False
16 | pretrained_model_name: google/vit-base-patch32-384
17 | image_size:
18 | - ${other_configs.image_w}
19 | - ${other_configs.image_h}
20 | hidden_dim: ${other_configs.hidden_dim}
21 | pretrained_model: vit_base_patch32_384
22 | mlp_dim: 3072
23 |
24 | text_embeddings:
25 | type: vilt_text_embedding
26 | bert_model_name: bert-base-uncased
27 | hidden_dim: ${other_configs.hidden_dim}
28 | hidden_size: 768
29 | max_position_embeddings: 512
30 | random_init: False
31 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/hateful_memes/direct.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | training:
5 | batch_size: 128
6 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/hateful_memes/from_coco.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | checkpoint:
5 | resume_pretrained: true
6 | resume_zoo: visual_bert.pretrained.coco
7 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/localized_narratives/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | visual_bert:
3 | hidden_size: 768
4 | hidden_dropout_prob: 0.1
5 | training_head_type: classification
6 | num_labels: 3129
7 |
8 | dataset_config:
9 | masked_localized_narratives:
10 | return_features_info: true
11 |
12 | optimizer:
13 | type: adam_w
14 | params:
15 | lr: 5e-5
16 | eps: 1e-8
17 |
18 | scheduler:
19 | type: warmup_linear
20 | params:
21 | num_warmup_steps: 1000
22 | num_training_steps: 11000
23 |
24 | training:
25 | batch_size: 32
26 | lr_scheduler: true
27 | num_workers: 0
28 | # Don't forget to update schedule_attributes if you update this
29 | max_updates: 88000
30 | find_unused_parameters: true
31 |
32 | checkpoint:
33 | pretrained_state_mapping:
34 | model.bert: model.bert
35 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/localized_narratives/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../../../../mmf/configs/datasets/coco2017/masked.yaml
3 | - ../../../../mmf/configs/datasets/flickr30k/masked.yaml
4 | - ../../../../mmf/configs/datasets/localized_narratives/masked.yaml
5 |
6 | model_config:
7 | visual_bert:
8 | training_head_type: pretraining
9 |
10 | optimizer:
11 | type: adam_w
12 | params:
13 | lr: 5e-5
14 | eps: 1e-8
15 |
16 | scheduler:
17 | type: warmup_linear
18 | params:
19 | num_warmup_steps: 1000
20 | num_training_steps: 11000
21 |
22 | training:
23 | batch_size: 32
24 | lr_scheduler: true
25 | num_workers: 0
26 | # Don't forget to update schedule_attributes if you update this
27 | max_updates: 88000
28 | find_unused_parameters: true
29 |
30 | checkpoint:
31 | pretrained_state_mapping:
32 | model.bert: model.bert
33 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | masked_coco:
3 | return_features_info: true
4 |
5 | optimizer:
6 | type: adam_w
7 | params:
8 | lr: 5e-5
9 | eps: 1e-8
10 |
11 | scheduler:
12 | type: warmup_linear
13 | params:
14 | num_warmup_steps: 1000
15 | num_training_steps: 11000
16 |
17 | training:
18 | batch_size: 480
19 | lr_scheduler: true
20 | # Don't forget to update schedule_attributes if you update this
21 | max_updates: 11000
22 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_coco/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_coco/pretrain_train_val.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | masked_coco:
6 | return_features_info: true
7 | use_images: false
8 | use_features: true
9 | features:
10 | train:
11 | - coco/defaults/features/trainval2014.lmdb
12 | - coco/defaults/features/trainval2014.lmdb
13 | annotations:
14 | train:
15 | - coco/defaults/annotations/imdb_karpathy_train_by_image.npy
16 | - coco/defaults/annotations/imdb_karpathy_val_by_image.npy
17 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | masked_conceptual_captions:
3 | return_features_info: true
4 |
5 | optimizer:
6 | type: adam_w
7 | params:
8 | lr: 5e-5
9 | eps: 1e-8
10 |
11 | scheduler:
12 | type: warmup_linear
13 | params:
14 | num_warmup_steps: 1000
15 | num_training_steps: 11000
16 |
17 | training:
18 | batch_size: 480
19 | lr_scheduler: true
20 | # Don't forget to update schedule_attributes if you update this
21 | max_updates: 11000
22 | find_unused_parameters: true
23 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_gqa/defaults.yaml:
--------------------------------------------------------------------------------
1 | optimizer:
2 | type: adam_w
3 | params:
4 | lr: 5e-5
5 | eps: 1e-8
6 |
7 | scheduler:
8 | type: warmup_linear
9 | params:
10 | num_warmup_steps: 2000
11 | num_training_steps: 88000
12 |
13 | training:
14 | batch_size: 480
15 | lr_scheduler: true
16 | # Don't forget to update schedule_attributes if you update this
17 | max_updates: 88000
18 | find_unused_parameters: true
19 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_sbu/defaults.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | visual_bert:
3 | bert_model_name: bert-base-uncased
4 | training_head_type: pretraining
5 | visual_embedding_dim: 2048
6 | special_visual_initialize: true
7 | hard_cap_seq_len: null
8 | cut_first: text
9 | embedding_strategy: plain
10 | bypass_transformer: false
11 | output_attentions: false
12 | output_hidden_states: false
13 | text_only: false
14 | random_initialize: false
15 |
16 | dataset_config:
17 | masked_sbu:
18 | return_features_info: true
19 |
20 | optimizer:
21 | type: adam_w
22 | params:
23 | lr: 5e-5
24 | eps: 1e-8
25 |
26 | scheduler:
27 | type: warmup_linear
28 | params:
29 | num_warmup_steps: 1000
30 | num_training_steps: 11000
31 |
32 | training:
33 | batch_size: 480
34 | lr_scheduler: true
35 | # Don't forget to update schedule_attributes if you update this
36 | max_updates: 11000
37 | find_unused_parameters: true
38 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_sbu/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | masked_vqa2:
3 | annotations:
4 | train:
5 | - vqa2/defaults/annotations/imdb_train2014.npy
6 | return_features_info: true
7 |
8 | optimizer:
9 | type: adam_w
10 | params:
11 | lr: 5e-5
12 | eps: 1e-8
13 |
14 | scheduler:
15 | type: warmup_linear
16 | params:
17 | num_warmup_steps: 1000
18 | num_training_steps: 11000
19 |
20 | training:
21 | batch_size: 480
22 | lr_scheduler: true
23 | # Don't forget to update schedule_attributes if you update this
24 | max_updates: 11000
25 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_vqa2/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_vqa2/pretrain_train_val.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | masked_vqa2:
6 | use_images: false
7 | use_features: true
8 | features:
9 | train:
10 | - coco/defaults/features/coco_trainval2014.lmdb
11 | - coco/defaults/features/coco_trainval2014.lmdb
12 | annotations:
13 | train:
14 | - vqa2/defaults/annotations/imdb_train2014.npy
15 | - vqa2/defaults/annotations/imdb_val2014.npy
16 | return_features_info: true
17 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/mmimdb/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | masked_mmimdb:
6 | return_features_info: true
7 |
8 | model_config:
9 | visual_bert:
10 | training_head_type: pretraining
11 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/vizwiz/train_val.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | vizwiz:
3 | return_features_info: true
4 | use_images: false
5 | use_features: true
6 | features:
7 | train:
8 | - vizwiz/v2019/features/detectron.lmdb
9 | - vizwiz/v2019/features/detectron.lmdb
10 | val:
11 | - vizwiz/v2019/features/detectron.lmdb
12 | test:
13 | - vizwiz/v2019/features/detectron.lmdb
14 | annotations:
15 | train:
16 | - datasets/vizwiz/imdbs/imdb_vizwiz_train.npy
17 | - datasets/vizwiz/imdbs/imdb_vizwiz_val.npy
18 | processors:
19 | # Stop fasttext from loading by overriding the context_processor
20 | context_processor:
21 | type: simple_word
22 | params: {}
23 | text_processor:
24 | type: bert_tokenizer
25 | params:
26 | tokenizer_config:
27 | type: bert-base-uncased
28 | params:
29 | do_lower_case: true
30 | mask_probability: 0
31 | max_seq_length: 128
32 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/vqa2/train_val.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 |
4 | dataset_config:
5 | vqa2:
6 | use_images: false
7 | use_features: true
8 | features:
9 | train:
10 | - coco/defaults/features/coco_trainval2014.lmdb
11 | - coco/defaults/features/coco_trainval2014.lmdb
12 | annotations:
13 | train:
14 | - vqa2/defaults/annotations/imdb_train2014.npy
15 | - vqa2/defaults/annotations/imdb_val2014.npy
16 | return_features_info: true
17 | processors:
18 | text_processor:
19 | type: bert_tokenizer
20 | params:
21 | tokenizer_config:
22 | type: bert-base-uncased
23 | params:
24 | do_lower_case: true
25 | mask_probability: 0
26 | max_seq_length: 128
27 |
--------------------------------------------------------------------------------
/projects/visual_bert/configs/vqa2/with_raw_images.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | - ../../../../mmf/configs/datasets/vqa2/with_raw_images.yaml
4 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.11.0
2 | torchaudio==0.11.0
3 | torchvision==0.12.0
4 | numpy>=1.16.6, <=1.21.4
5 | tqdm>=4.43.0,<4.50.0
6 | torchtext==0.12.0
7 | GitPython==3.1.30
8 | requests==2.23.0
9 | fasttext==0.9.1
10 | nltk==3.6.6
11 | editdistance==0.5.3
12 | transformers>=3.4.0, <=4.10.1
13 | sklearn==0.0
14 | omegaconf>=2.0.6, <=2.1
15 | lmdb==0.98
16 | termcolor==1.1.0
17 | iopath==0.1.8
18 | datasets==1.2.1
19 | matplotlib==3.3.4
20 | pycocotools==2.0.2
21 | ftfy==5.8
22 | pytorch-lightning==1.6.0
23 | psutil
24 | pillow==9.3.0
25 | sentencepiece
26 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.utils.patch import patch_transformers
3 |
4 |
5 | patch_transformers()
6 |
--------------------------------------------------------------------------------
/tests/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tests/common/test_meter.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import unittest
3 |
4 | import torch
5 | from mmf.common.meter import Meter
6 | from mmf.common.report import Report
7 | from mmf.common.sample import SampleList
8 |
9 |
10 | class TestMeter(unittest.TestCase):
11 | def test_meter_update_from_report(self):
12 | meter = Meter()
13 | prepared_batch = SampleList(
14 | {"targets": torch.tensor([1, 2, 3, 4]), "dataset_type": "val"}
15 | )
16 | for idx in range(5):
17 | model_output = {
18 | "scores": torch.tensor([0, 1, 2, 3]),
19 | "losses": {"loss": float(idx)},
20 | }
21 | report = Report(prepared_batch, model_output)
22 | meter.update_from_report(report)
23 |
24 | self.assertEqual(meter.loss.global_avg, 2.0)
25 | self.assertEqual(meter.loss.avg, 2.0)
26 |
--------------------------------------------------------------------------------
/tests/configs/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tests/data/user_dir/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # flake8: noqa: F401
3 |
4 | from . import datasets, models
5 |
--------------------------------------------------------------------------------
/tests/data/user_dir/configs/always_one.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 | always_one: {}
3 |
--------------------------------------------------------------------------------
/tests/data/user_dir/configs/experiment.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | simple:
3 | losses:
4 | - type: cross_entropy
5 |
6 | optimizer:
7 | type: SGD
8 | params:
9 | lr: 1e-3
10 |
11 | evaluation:
12 | metrics:
13 | - accuracy
14 |
15 | training:
16 | batch_size: 8
17 | lr_scheduler: false
18 | max_updates: 50
19 | early_stop:
20 | criteria: always_one/accuracy
21 | minimize: false
22 | log_format: json
23 |
--------------------------------------------------------------------------------
/tests/data/user_dir/configs/simple.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | simple:
3 | in_dim: 1
4 | data_item_key: input
5 |
--------------------------------------------------------------------------------
/tests/data/user_dir/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # flake8: noqa: F401
3 |
4 | from . import always_one
5 |
--------------------------------------------------------------------------------
/tests/data/user_dir/datasets/always_one.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.common.registry import registry
4 | from mmf.datasets.base_dataset_builder import BaseDatasetBuilder
5 | from tests.test_utils import NumbersDataset
6 |
7 |
8 | DATASET_LEN = 20
9 |
10 |
11 | @registry.register_builder("always_one")
12 | class AlwaysOneBuilder(BaseDatasetBuilder):
13 | def __init__(self):
14 | super().__init__("always_one")
15 |
16 | def build(self, *args, **Kwargs):
17 | pass
18 |
19 | @classmethod
20 | def config_path(cls):
21 | return "configs/always_one.yaml"
22 |
23 | def load(self, config, dataset_type="train", *args, **kwargs):
24 | dataset = NumbersDataset(DATASET_LEN, data_item_key="input", always_one=True)
25 | dataset.dataset_name = self.dataset_name
26 | dataset.dataset_type = dataset_type
27 | return dataset
28 |
--------------------------------------------------------------------------------
/tests/data/user_dir/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # flake8: noqa: F401
3 |
4 | from . import simple
5 |
--------------------------------------------------------------------------------
/tests/data/user_dir/models/simple.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | from mmf.common.registry import registry
4 | from tests.test_utils import SimpleModel
5 |
6 |
7 | @registry.register_model("simple")
8 | class CustomSimpleModel(SimpleModel):
9 | @classmethod
10 | def config_path(cls):
11 | return "configs/simple.yaml"
12 |
13 | def forward(self, sample_list):
14 | return {"scores": self.classifier(sample_list.input)}
15 |
--------------------------------------------------------------------------------
/tests/data/vocab.txt:
--------------------------------------------------------------------------------
1 | a
2 | man
3 | with
4 | red
5 | helmet
6 | on
7 | small
8 | moped
9 | dirt
10 | road
11 | riding
12 | motor
13 | bike
14 | the
15 | countryside
16 | back
17 | of
18 | motorcycle
19 |
--------------------------------------------------------------------------------
/tests/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tests/datasets/test_prediction_processors.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import unittest
4 |
5 | import torch
6 | from mmf.common.report import Report
7 | from mmf.common.sample import SampleList
8 | from mmf.datasets.processors.prediction_processors import ArgMaxPredictionProcessor
9 |
10 |
11 | class TestDatasetProcessors(unittest.TestCase):
12 | def setUp(self):
13 | torch.manual_seed(1234)
14 |
15 | def test_argmax_prediction_processor(self):
16 | processor = ArgMaxPredictionProcessor(config={})
17 | batch = SampleList({"id": torch.tensor([1, 2, 3, 4, 5], dtype=torch.long)})
18 | model_output = {"scores": torch.rand(5, 4)}
19 | report = Report(batch, model_output)
20 |
21 | predictions = processor(report)
22 |
23 | expected_answers = [1, 1, 2, 1, 3]
24 | expected = []
25 | for idx, answer in enumerate(expected_answers):
26 | expected.append({"id": idx + 1, "answer": answer})
27 |
28 | self.assertEqual(predictions, expected)
29 |
--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tests/models/interfaces/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tests/models/test_albef.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import unittest
3 |
4 | import torch
5 | from mmf.models.albef.vit import AlbefVitEncoder
6 | from omegaconf import OmegaConf
7 | from tests.test_utils import setup_proxy
8 | from torch import nn
9 |
10 |
11 | class TestAlbefEncoders(unittest.TestCase):
12 | def setUp(self):
13 | setup_proxy()
14 |
15 | def _test_init(self, cls, **params):
16 | encoder = cls.from_params(**params)
17 | self.assertTrue(isinstance(encoder, nn.Module))
18 |
19 | def test_vision_transformer(self):
20 | config = OmegaConf.structured(AlbefVitEncoder.Config())
21 | encoder = AlbefVitEncoder(config)
22 | x = torch.rand((1, 3, 224, 224))
23 | output = encoder(x)
24 | self.assertEqual(output.size(-1), config.out_dim)
25 |
--------------------------------------------------------------------------------
/tests/models/transformers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tests/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tests/modules/test_hf_layers.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import unittest
4 |
5 | from mmf.modules.hf_layers import replace_with_jit, undo_replace_with_jit
6 |
7 | try:
8 | from transformers3.modeling_bert import BertSelfAttention
9 | except ImportError:
10 | from transformers.modeling_bert import BertSelfAttention
11 |
12 |
13 | class TestHFLayers(unittest.TestCase):
14 | def test_undo_replace_with_jit(self):
15 | original_function = BertSelfAttention.forward
16 | replace_with_jit()
17 | undo_replace_with_jit()
18 | self.assertTrue(BertSelfAttention.forward is original_function)
19 |
--------------------------------------------------------------------------------
/tests/trainers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tests/trainers/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tests/trainers/lightning/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tests/trainers/test_device.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import unittest
3 |
4 | import torch
5 | from mmf.trainers.core.device import TrainerDeviceMixin
6 | from mmf.utils.general import get_current_device
7 | from omegaconf import OmegaConf
8 |
9 |
10 | class DeviceMock(TrainerDeviceMixin):
11 | def __init__(self, config):
12 | self.config = config
13 |
14 |
15 | class TestDevice(unittest.TestCase):
16 | def test_current_device(self):
17 | config = {
18 | "training": {"seed": 1, "cudnn_benchmark": False},
19 | "distributed": {"init_method": None},
20 | }
21 | deviceMock = DeviceMock(OmegaConf.create(config))
22 | deviceMock.configure_seed()
23 | deviceMock.configure_device()
24 | device = get_current_device()
25 | if torch.cuda.is_available():
26 | self.assertEqual(device, "cuda:0")
27 | else:
28 | self.assertEqual(device, torch.device(type="cpu"))
29 |
--------------------------------------------------------------------------------
/tests/trainers/test_eval_loop.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import unittest
4 | from unittest.mock import MagicMock, patch
5 |
6 | import torch
7 | from tests.trainers.test_utils import get_config_with_defaults, get_mmf_trainer
8 |
9 |
10 | class TestEvalLoop(unittest.TestCase):
11 | def setUp(self):
12 | torch.manual_seed(2)
13 |
14 | @patch(
15 | "mmf.common.test_reporter.PathManager",
16 | return_value=MagicMock(return_value=None),
17 | )
18 | @patch("mmf.common.test_reporter.get_mmf_env", return_value="")
19 | def test_eval_loop(self, a, b):
20 | config = get_config_with_defaults(
21 | {"training": {"max_updates": 2, "max_epochs": 2}}
22 | )
23 | trainer = get_mmf_trainer(config=config)
24 | combined_report, meter = trainer.evaluation_loop("val")
25 | self.assertAlmostEqual(combined_report["losses"]["loss"], 493377.5312)
26 | self.assertAlmostEqual(combined_report["logits"].item(), -0.2379742, 6)
27 |
--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tests/utils/test_distributed.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import unittest
3 |
4 | import mmf.utils.distributed as distributed
5 |
6 |
7 | class TestUtilsDistributed(unittest.TestCase):
8 | def test_object_byte_tensor_conversion(self):
9 | test_obj = [1, "2", {3: 4}, [5]]
10 | test_obj_bytes = distributed.object_to_byte_tensor(test_obj)
11 | test_obj_dec = distributed.byte_tensor_to_object(test_obj_bytes)
12 | self.assertEqual(test_obj_dec, test_obj)
13 |
--------------------------------------------------------------------------------
/tests/utils/test_patch.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import unittest
4 |
5 | from mmf.common.registry import registry
6 | from mmf.utils.patch import (
7 | ORIGINAL_PATCH_FUNCTIONS_KEY,
8 | restore_saved_modules,
9 | safecopy_modules,
10 | )
11 |
12 |
13 | class TestClass:
14 | @staticmethod
15 | def test_function():
16 | return True
17 |
18 |
19 | class TestUtilsPatch(unittest.TestCase):
20 | def setUp(self):
21 | registry.register(ORIGINAL_PATCH_FUNCTIONS_KEY, {})
22 |
23 | def test_safecopy_modules(self):
24 | safecopy_modules(["TestClass.test_function"], {"TestClass": TestClass})
25 | original_functions = registry.get(ORIGINAL_PATCH_FUNCTIONS_KEY)
26 | self.assertTrue("TestClass.test_function" in original_functions)
27 |
28 | TestClass.test_function = lambda: False
29 | restore_saved_modules({"TestClass": TestClass})
30 | self.assertTrue(TestClass.test_function())
31 |
--------------------------------------------------------------------------------
/tests/utils/test_timer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import time
3 | import unittest
4 |
5 | from mmf.utils.timer import Timer
6 |
7 |
8 | class TestUtilsTimer(unittest.TestCase):
9 | def test_get_current(self):
10 | timer = Timer()
11 | expected = 0
12 |
13 | self.assertEqual(int(timer.get_current().split("ms")[0]), expected)
14 |
15 | def test_reset(self):
16 | timer = Timer()
17 | time.sleep(2)
18 | timer.reset()
19 | expected = 0
20 |
21 | self.assertEqual(int(timer.get_current().split("ms")[0]), expected)
22 |
23 | def test_get_time_since_start(self):
24 | timer = Timer()
25 | time.sleep(2)
26 | expected = 2
27 |
28 | self.assertEqual(expected, int(timer.get_time_since_start().split("s")[0]))
29 |
--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tools/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
--------------------------------------------------------------------------------
/tools/scripts/bert/extract_bert.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | N_REM=`expr $3 - 1`
3 |
4 | for i in $(seq 0 $N_REM); do
5 | python tools/scripts/bert/extract_bert_embeddings.py --imdb_path $1 --out_path $2 --group_id $i --n_groups $3 &
6 | done
7 |
--------------------------------------------------------------------------------
/tools/scripts/gqa/README.md:
--------------------------------------------------------------------------------
1 | # Converstion of GQA to VQA format
2 |
3 | * Download GQA datasets and store as format shown in conversion script
4 | * Download glove embeddings 300D file
5 | * Run the script from the root of the repo as by changing relevant paths:
6 |
7 | ```
8 | python tools/scripts/gqa/convert_gqa_to_vqa.py --gqa_dir --out_dir
9 | ```
10 |
--------------------------------------------------------------------------------
/tools/scripts/visual_dialog/extract_vocabulary.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import json
3 |
4 | from tools.scripts.gqa.extract_vocabulary import ExtractVocabulary
5 |
6 |
7 | class ExtractVisdialVocabulary(ExtractVocabulary):
8 | def __init__(self):
9 | super().__init__()
10 |
11 | def get_text(self):
12 | text = []
13 |
14 | for input_file in self.input_files:
15 | with open(input_file) as f:
16 | f_json = json.load(f)
17 | # Add 'questions' from visdial
18 | text += f_json["data"]["questions"]
19 | # Add 'answers' from visdial
20 | text += f_json["data"]["answers"]
21 |
22 | for dialog in f_json["data"]["dialogs"]:
23 | text += [dialog["caption"]]
24 | return text
25 |
26 |
27 | if __name__ == "__main__":
28 | extractor = ExtractVisdialVocabulary()
29 | extractor.extract()
30 |
--------------------------------------------------------------------------------
/tools/sweeps/README.md:
--------------------------------------------------------------------------------
1 | # Sweep Scripts
2 |
3 | See [https://mmf.sh/docs/tutorials/slurm](https://mmf.sh/docs/tutorials/slurm) for tutorial on how to use these scripts.
4 |
--------------------------------------------------------------------------------
/website/.eslintignore:
--------------------------------------------------------------------------------
1 | .docusaurus
2 | static/api
3 | build/
4 |
--------------------------------------------------------------------------------
/website/.gitignore:
--------------------------------------------------------------------------------
1 | # Dependencies
2 | /node_modules
3 |
4 | # Production
5 | /build
6 |
7 | # Generated files
8 | .docusaurus
9 | .cache-loader
10 |
11 | # Misc
12 | .DS_Store
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 |
18 | npm-debug.log*
19 | yarn-debug.log*
20 | yarn-error.log*
21 |
22 | # ESLint
23 | .eslintcache
24 |
25 | # Static Docs
26 | static/api
27 |
--------------------------------------------------------------------------------
/website/.prettierignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | build
3 | .docusaurus
4 | static/api
5 |
--------------------------------------------------------------------------------
/website/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "arrowParens": "always",
3 | "bracketSpacing": false,
4 | "jsxBracketSameLine": true,
5 | "printWidth": 80,
6 | "proseWrap": "never",
7 | "singleQuote": true,
8 | "trailingComma": "all"
9 | }
10 |
--------------------------------------------------------------------------------
/website/.stylelintrc.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) Facebook, Inc. and its affiliates.
3 | *
4 | * This source code is licensed under the MIT license found in the
5 | * LICENSE file in the root directory of this source tree.
6 | */
7 |
8 | module.exports = {
9 | plugins: ['stylelint-copyright'],
10 | rules: {
11 | 'docusaurus/copyright-header': true,
12 | },
13 | };
14 |
--------------------------------------------------------------------------------
/website/docs/getting_started/faqs.md:
--------------------------------------------------------------------------------
1 | ---
2 | id: faqs
3 | title: Frequently Asked Questions (FAQ)
4 | sidebar_label: FAQs
5 | ---
6 | ## Coming Soon!
7 |
--------------------------------------------------------------------------------
/website/docs/getting_started/video_overview.md:
--------------------------------------------------------------------------------
1 | ---
2 | id: video_overview
3 | title: Video overview
4 | sidebar_label: Video overview
5 | ---
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/website/src/pages/api_redirect/index.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) Facebook, Inc. and its affiliates.
3 | *
4 | * This source code is licensed under the MIT license found in the
5 | * LICENSE file in the root directory of this source tree.
6 | *
7 | * @format
8 | */
9 | import React from 'react';
10 | import BrowserOnly from '@docusaurus/BrowserOnly';
11 | import {useHistory} from 'react-router-dom';
12 |
13 | const API = () => {
14 | const history = useHistory();
15 | history.push('/');
16 | return (
17 | Some Fallback Content
}>
18 | {() => {
19 | window.location.href = '/api';
20 | }}
21 |
22 | );
23 | };
24 |
25 | export default API;
26 |
--------------------------------------------------------------------------------
/website/static/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | # This config file will prevent tests from being run on the gh-pages branch.
2 | version: 2
3 | jobs:
4 | build:
5 | machine: true
6 | branches:
7 | ignore: gh-pages
8 | steps:
9 | -run: echo "Skipping tests on gh-pages branch"
10 |
--------------------------------------------------------------------------------
/website/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/.nojekyll
--------------------------------------------------------------------------------
/website/static/CNAME:
--------------------------------------------------------------------------------
1 | mmf.sh
2 |
--------------------------------------------------------------------------------
/website/static/img/boilerplate.svg:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/website/static/img/boilerplate_white.svg:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/website/static/img/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/favicon.png
--------------------------------------------------------------------------------
/website/static/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/logo.png
--------------------------------------------------------------------------------
/website/static/img/logo_white_f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/logo_white_f.png
--------------------------------------------------------------------------------
/website/static/img/oss_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/oss_logo.png
--------------------------------------------------------------------------------
/website/static/img/pytorch_logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
8 | pytorch_logo
9 |
11 |
13 |
14 |
--------------------------------------------------------------------------------
/website/static/img/pytorch_logo_white.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
8 | pytorch_logo
9 |
11 |
13 |
14 |
--------------------------------------------------------------------------------