├── .circleci
    └── config.yml
├── .editorconfig
├── .flake8
├── .github
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.md
    │   ├── config.yml
    │   ├── feature-request.md
    │   ├── questions-help-support.md
    │   └── unexpected-problems.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── cpu_test.yaml
    │   ├── deploy_website.yaml
    │   └── linter_test.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── MANIFEST.in
├── NOTICES
├── PACKAGE
├── README.md
├── docs
    ├── .gitignore
    ├── Makefile
    ├── license_header.txt
    ├── requirements.txt
    └── source
    │   ├── _static
    │       ├── css
    │       │   └── customize.css
    │       ├── images
    │       │   ├── chevron-right-orange.svg
    │       │   ├── chevron_blue.svg
    │       │   ├── favicon.png
    │       │   ├── logo_icon.svg
    │       │   ├── mmf_logo.png
    │       │   ├── mmf_logo.svg
    │       │   ├── mmf_logo_white_f.svg
    │       │   ├── search_icon.svg
    │       │   └── view-page-source-icon.svg
    │       └── js
    │       │   ├── ga.js
    │       │   └── redirect.js
    │   ├── _templates
    │       ├── layout.html
    │       └── theme_variables.jinja
    │   ├── conf.py
    │   ├── index.rst
    │   └── lib
    │       ├── common
    │           ├── registry.rst
    │           └── sample.rst
    │       ├── datasets
    │           ├── base_dataset.rst
    │           ├── base_dataset_builder.rst
    │           └── processors.rst
    │       ├── models
    │           └── base_model.rst
    │       ├── modules
    │           ├── losses.rst
    │           └── metrics.rst
    │       └── utils
    │           └── text.rst
├── mmf
    ├── __init__.py
    ├── common
    │   ├── __init__.py
    │   ├── batch_collator.py
    │   ├── constants.py
    │   ├── dataset_loader.py
    │   ├── meter.py
    │   ├── registry.py
    │   ├── report.py
    │   ├── sample.py
    │   ├── test_reporter.py
    │   └── typings.py
    ├── configs
    │   ├── datasets
    │   │   ├── airstore
    │   │   │   └── defaults.yaml
    │   │   ├── charades
    │   │   │   └── defaults.yaml
    │   │   ├── clevr
    │   │   │   └── defaults.yaml
    │   │   ├── coco
    │   │   │   ├── defaults.yaml
    │   │   │   ├── detection.yaml
    │   │   │   ├── masked.yaml
    │   │   │   └── ocr_en.yaml
    │   │   ├── coco2017
    │   │   │   └── masked.yaml
    │   │   ├── conceptual_captions
    │   │   │   ├── defaults.yaml
    │   │   │   ├── masked.yaml
    │   │   │   └── train_small.yaml
    │   │   ├── flickr30k
    │   │   │   └── masked.yaml
    │   │   ├── glue
    │   │   │   └── defaults.yaml
    │   │   ├── gqa
    │   │   │   ├── defaults.yaml
    │   │   │   └── masked.yaml
    │   │   ├── hateful_memes
    │   │   │   ├── bert.yaml
    │   │   │   ├── defaults.yaml
    │   │   │   ├── fine_grained
    │   │   │   │   ├── attack_vectors.yaml
    │   │   │   │   ├── defaults.yaml
    │   │   │   │   ├── hateful_pc_attack.yaml
    │   │   │   │   ├── pc_attack.yaml
    │   │   │   │   ├── protected_groups.yaml
    │   │   │   │   └── with_features.yaml
    │   │   │   └── with_features.yaml
    │   │   ├── localized_narratives
    │   │   │   └── masked.yaml
    │   │   ├── mmimdb
    │   │   │   ├── defaults.yaml
    │   │   │   ├── masked.yaml
    │   │   │   └── with_features.yaml
    │   │   ├── nlvr2
    │   │   │   └── defaults.yaml
    │   │   ├── ocrvqa
    │   │   │   └── defaults.yaml
    │   │   ├── okvqa
    │   │   │   └── defaults.yaml
    │   │   ├── retrieval
    │   │   │   └── flickr30k_defaults.yaml
    │   │   ├── sbu_captions
    │   │   │   └── masked.yaml
    │   │   ├── stvqa
    │   │   │   └── defaults.yaml
    │   │   ├── textcaps
    │   │   │   └── defaults.yaml
    │   │   ├── textvqa
    │   │   │   ├── defaults.yaml
    │   │   │   └── with_resnet.yaml
    │   │   ├── vinvl
    │   │   │   └── defaults.yaml
    │   │   ├── visual_dialog
    │   │   │   └── defaults.yaml
    │   │   ├── visual_entailment
    │   │   │   └── defaults.yaml
    │   │   ├── visual_genome
    │   │   │   ├── defaults.yaml
    │   │   │   ├── detection.yaml
    │   │   │   └── masked.yaml
    │   │   ├── vizwiz
    │   │   │   └── defaults.yaml
    │   │   ├── vqa2
    │   │   │   ├── defaults.yaml
    │   │   │   ├── masked.yaml
    │   │   │   ├── masked_q.yaml
    │   │   │   ├── train_val.yaml
    │   │   │   └── with_raw_images.yaml
    │   │   └── vqacp_v2
    │   │   │   └── defaults.yaml
    │   ├── defaults.yaml
    │   ├── models
    │   │   ├── alignment
    │   │   │   └── defaults.yaml
    │   │   ├── ban
    │   │   │   └── defaults.yaml
    │   │   ├── butd
    │   │   │   └── defaults.yaml
    │   │   ├── cnn_lstm
    │   │   │   └── defaults.yaml
    │   │   ├── fusions
    │   │   │   ├── concat_bert.yaml
    │   │   │   ├── concat_bow.yaml
    │   │   │   ├── defaults.yaml
    │   │   │   └── late_fusion.yaml
    │   │   ├── krisp
    │   │   │   └── defaults.yaml
    │   │   ├── lorra
    │   │   │   └── defaults.yaml
    │   │   ├── lxmert
    │   │   │   ├── defaults.yaml
    │   │   │   └── pretrain.yaml
    │   │   ├── m4c
    │   │   │   └── defaults.yaml
    │   │   ├── m4c_captioner
    │   │   │   └── defaults.yaml
    │   │   ├── mmbt
    │   │   │   ├── classification.yaml
    │   │   │   ├── defaults.yaml
    │   │   │   ├── pretrain.yaml
    │   │   │   └── with_features.yaml
    │   │   ├── mmf_bert
    │   │   │   └── defaults.yaml
    │   │   ├── mmf_transformer
    │   │   │   ├── defaults.yaml
    │   │   │   ├── pretrain.yaml
    │   │   │   └── with_audio_video.yaml
    │   │   ├── movie_mcan
    │   │   │   └── defaults.yaml
    │   │   ├── pythia
    │   │   │   └── defaults.yaml
    │   │   ├── unimodal
    │   │   │   ├── bert.yaml
    │   │   │   ├── image.yaml
    │   │   │   ├── text.yaml
    │   │   │   └── with_features.yaml
    │   │   ├── unit
    │   │   │   └── defaults.yaml
    │   │   ├── uniter
    │   │   │   └── defaults.yaml
    │   │   ├── vilbert
    │   │   │   ├── defaults.yaml
    │   │   │   └── pretrain.yaml
    │   │   ├── vilt
    │   │   │   └── defaults.yaml
    │   │   ├── vinvl
    │   │   │   └── defaults.yaml
    │   │   └── visual_bert
    │   │   │   ├── classification.yaml
    │   │   │   ├── defaults.yaml
    │   │   │   └── pretrain.yaml
    │   └── zoo
    │   │   ├── datasets.yaml
    │   │   └── models.yaml
    ├── datasets
    │   ├── __init__.py
    │   ├── base_dataset.py
    │   ├── base_dataset_builder.py
    │   ├── builders
    │   │   ├── __init__.py
    │   │   ├── airstore
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── charades
    │   │   │   ├── __init__.py
    │   │   │   ├── _utils.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── clevr
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── coco
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   ├── dataset.py
    │   │   │   ├── detection_builder.py
    │   │   │   ├── detection_dataset.py
    │   │   │   ├── masked_builder.py
    │   │   │   └── masked_dataset.py
    │   │   ├── coco2017
    │   │   │   ├── __init__.py
    │   │   │   ├── masked_builder.py
    │   │   │   └── masked_dataset.py
    │   │   ├── conceptual_captions
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   ├── dataset.py
    │   │   │   ├── masked_builder.py
    │   │   │   └── masked_dataset.py
    │   │   ├── flickr30k
    │   │   │   ├── __init__.py
    │   │   │   ├── masked_builder.py
    │   │   │   └── masked_dataset.py
    │   │   ├── glue
    │   │   │   ├── __init__.py
    │   │   │   └── builder.py
    │   │   ├── gqa
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   ├── dataset.py
    │   │   │   ├── masked_builder.py
    │   │   │   └── masked_dataset.py
    │   │   ├── hateful_memes
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── localized_narratives
    │   │   │   ├── __init__.py
    │   │   │   ├── database.py
    │   │   │   ├── masked_builder.py
    │   │   │   └── masked_dataset.py
    │   │   ├── mmimdb
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   ├── dataset.py
    │   │   │   ├── masked_builder.py
    │   │   │   └── masked_dataset.py
    │   │   ├── nlvr2
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── ocrvqa
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── okvqa
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   ├── database.py
    │   │   │   └── dataset.py
    │   │   ├── retrieval
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   ├── dataset.py
    │   │   │   └── datasets.py
    │   │   ├── sbu_captions
    │   │   │   ├── __init__.py
    │   │   │   ├── masked_builder.py
    │   │   │   └── masked_dataset.py
    │   │   ├── stvqa
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── textcaps
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── textvqa
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── vinvl
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── visual_dialog
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   ├── database.py
    │   │   │   └── dataset.py
    │   │   ├── visual_entailment
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── visual_genome
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   ├── dataset.py
    │   │   │   ├── detection_builder.py
    │   │   │   ├── detection_dataset.py
    │   │   │   ├── masked_builder.py
    │   │   │   └── masked_dataset.py
    │   │   ├── vizwiz
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── dataset.py
    │   │   ├── vqa2
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   ├── dataset.py
    │   │   │   ├── masked_builder.py
    │   │   │   ├── masked_dataset.py
    │   │   │   ├── masked_q_vqa2_builder.py
    │   │   │   ├── masked_q_vqa2_dataset.py
    │   │   │   ├── ocr_builder.py
    │   │   │   └── ocr_dataset.py
    │   │   └── vqacp_v2
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   ├── database.py
    │   │   │   └── dataset.py
    │   ├── concat_dataset.py
    │   ├── databases
    │   │   ├── __init__.py
    │   │   ├── annotation_database.py
    │   │   ├── features_database.py
    │   │   ├── image_database.py
    │   │   ├── readers
    │   │   │   ├── __init__.py
    │   │   │   └── feature_readers.py
    │   │   └── scene_graph_database.py
    │   ├── iteration_strategies.py
    │   ├── lightning_multi_datamodule.py
    │   ├── lightning_multi_dataset_loader.py
    │   ├── mmf_dataset.py
    │   ├── mmf_dataset_builder.py
    │   ├── multi_datamodule.py
    │   ├── multi_dataset_loader.py
    │   ├── processors
    │   │   ├── __init__.py
    │   │   ├── bert_processors.py
    │   │   ├── detection_transforms.py
    │   │   ├── frcnn_processor.py
    │   │   ├── functional.py
    │   │   ├── image_processors.py
    │   │   ├── prediction_processors.py
    │   │   ├── processors.py
    │   │   └── video_processors.py
    │   └── subset_dataset.py
    ├── models
    │   ├── __init__.py
    │   ├── albef
    │   │   ├── __init__.py
    │   │   └── vit.py
    │   ├── alignment.py
    │   ├── ban.py
    │   ├── base_model.py
    │   ├── butd.py
    │   ├── cnn_lstm.py
    │   ├── frcnn.py
    │   ├── fusions.py
    │   ├── interfaces
    │   │   ├── __init__.py
    │   │   └── mmbt.py
    │   ├── krisp.py
    │   ├── lorra.py
    │   ├── lxmert.py
    │   ├── m4c.py
    │   ├── m4c_captioner.py
    │   ├── mmbt.py
    │   ├── mmf_bert.py
    │   ├── mmf_transformer.py
    │   ├── movie_mcan.py
    │   ├── pythia.py
    │   ├── top_down_bottom_up.py
    │   ├── transformers
    │   │   ├── __init__.py
    │   │   ├── backends
    │   │   │   ├── __init__.py
    │   │   │   └── huggingface.py
    │   │   ├── base.py
    │   │   └── heads
    │   │   │   ├── __init__.py
    │   │   │   ├── contrastive.py
    │   │   │   ├── itm.py
    │   │   │   ├── mlm.py
    │   │   │   ├── mlp.py
    │   │   │   ├── mrc.py
    │   │   │   ├── mrfr.py
    │   │   │   ├── refiner.py
    │   │   │   ├── refnet_classifier.py
    │   │   │   ├── utils.py
    │   │   │   └── wra.py
    │   ├── unimodal.py
    │   ├── unit
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── matcher.py
    │   │   ├── misc.py
    │   │   ├── transformer.py
    │   │   ├── unit.py
    │   │   └── unit_base_model.py
    │   ├── uniter.py
    │   ├── vilbert.py
    │   ├── vilt.py
    │   ├── vinvl.py
    │   ├── visdial_multi_modal.py
    │   └── visual_bert.py
    ├── modules
    │   ├── __init__.py
    │   ├── attention.py
    │   ├── bottleneck.py
    │   ├── decoders.py
    │   ├── embeddings.py
    │   ├── encoders.py
    │   ├── fusions.py
    │   ├── hf_layers.py
    │   ├── layers.py
    │   ├── losses.py
    │   ├── metrics.py
    │   ├── optimizers.py
    │   ├── ot.py
    │   ├── poolers.py
    │   ├── schedulers.py
    │   └── vit.py
    ├── projects
    ├── trainers
    │   ├── __init__.py
    │   ├── base_trainer.py
    │   ├── callbacks
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── checkpoint.py
    │   │   ├── early_stopping.py
    │   │   ├── logistics.py
    │   │   └── lr_scheduler.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── callback_hook.py
    │   │   ├── device.py
    │   │   ├── evaluation_loop.py
    │   │   ├── profiling.py
    │   │   └── training_loop.py
    │   ├── lightning_core
    │   │   ├── __init__.py
    │   │   ├── loop_callback.py
    │   │   ├── loop_callback_with_torchmetrics.py
    │   │   └── torchmetric.py
    │   ├── lightning_trainer.py
    │   └── mmf_trainer.py
    ├── utils
    │   ├── __init__.py
    │   ├── box_ops.py
    │   ├── build.py
    │   ├── checkpoint.py
    │   ├── checkpoint_updater.py
    │   ├── configuration.py
    │   ├── dataset.py
    │   ├── distributed.py
    │   ├── download.py
    │   ├── early_stopping.py
    │   ├── env.py
    │   ├── features
    │   │   ├── __init__.py
    │   │   └── visualizing_image.py
    │   ├── file_io.py
    │   ├── flags.py
    │   ├── general.py
    │   ├── inference.py
    │   ├── logger.py
    │   ├── m4c_evaluators.py
    │   ├── modeling.py
    │   ├── patch.py
    │   ├── phoc
    │   │   ├── __init__.py
    │   │   ├── build_phoc.py
    │   │   └── src
    │   │   │   └── cphoc.c
    │   ├── process_answers.py
    │   ├── text.py
    │   ├── timer.py
    │   ├── torchscript.py
    │   ├── transform.py
    │   ├── visualize.py
    │   ├── vocab.py
    │   └── xla.py
    └── version.py
├── mmf_cli
    ├── __init__.py
    ├── hm_convert.py
    ├── interactive.py
    ├── predict.py
    ├── run.py
    └── torchx_entryscript.py
├── projects
    ├── ban
    │   ├── README.md
    │   └── configs
    │   │   ├── textvqa
    │   │       └── defaults.yaml
    │   │   ├── vizwiz
    │   │       └── defaults.yaml
    │   │   └── vqa2
    │   │       └── defaults.yaml
    ├── butd
    │   ├── README.md
    │   └── configs
    │   │   ├── coco
    │   │       ├── beam_search.yaml
    │   │       ├── defaults.yaml
    │   │       └── nucleus_sampling.yaml
    │   │   ├── conceptual_captions
    │   │       ├── beam_search.yaml
    │   │       ├── defaults.yaml
    │   │       └── nucleus_sampling.yaml
    │   │   └── textcaps
    │   │       ├── beam_search.yaml
    │   │       ├── defaults.yaml
    │   │       └── eval_pretrained_coco_model.yaml
    ├── hateful_memes
    │   ├── README.md
    │   ├── configs
    │   │   ├── concat_bert
    │   │   │   └── defaults.yaml
    │   │   ├── concat_bow
    │   │   │   └── defaults.yaml
    │   │   ├── late_fusion
    │   │   │   └── defaults.yaml
    │   │   ├── mmbt
    │   │   │   ├── defaults.yaml
    │   │   │   └── with_features.yaml
    │   │   ├── mmf_transformer
    │   │   │   └── defaults.yaml
    │   │   ├── unimodal
    │   │   │   ├── bert.yaml
    │   │   │   ├── image.yaml
    │   │   │   ├── text.yaml
    │   │   │   └── with_features.yaml
    │   │   ├── vilbert
    │   │   │   ├── defaults.yaml
    │   │   │   ├── direct.yaml
    │   │   │   └── from_cc.yaml
    │   │   └── visual_bert
    │   │   │   ├── defaults.yaml
    │   │   │   ├── direct.yaml
    │   │   │   └── from_coco.yaml
    │   └── fine_grained
    │   │   ├── README.md
    │   │   └── configs
    │   │       └── visual_bert
    │   │           ├── attack_vectors.yaml
    │   │           ├── defaults.yaml
    │   │           ├── hateful_pc_attack.yaml
    │   │           ├── multilabel.yaml
    │   │           ├── pc_attack.yaml
    │   │           └── protected_groups.yaml
    ├── krisp
    │   ├── README.md
    │   ├── configs
    │   │   ├── krisp
    │   │   │   ├── okvqa
    │   │   │   │   ├── conceptnet_only.yaml
    │   │   │   │   ├── dbpedia_only.yaml
    │   │   │   │   ├── defaults.yaml
    │   │   │   │   ├── haspart_only.yaml
    │   │   │   │   ├── okvqav10.yaml
    │   │   │   │   ├── okvqav10_fromfullpretrain.yaml
    │   │   │   │   ├── randomgraph.yaml
    │   │   │   │   ├── train_val.yaml
    │   │   │   │   ├── train_val_cnonly.yaml
    │   │   │   │   ├── train_val_dbonly.yaml
    │   │   │   │   ├── train_val_hponly.yaml
    │   │   │   │   ├── train_val_okvqav10.yaml
    │   │   │   │   ├── train_val_okvqav10_fromfullpretrain.yaml
    │   │   │   │   ├── train_val_random.yaml
    │   │   │   │   ├── train_val_vgonly.yaml
    │   │   │   │   └── visualgenome_only.yaml
    │   │   │   └── vqa2
    │   │   │   │   └── krisp_pretrain.yaml
    │   │   └── visual_bert
    │   │   │   ├── masked_coco
    │   │   │       └── okvqa_safe.yaml
    │   │   │   ├── masked_vqa2
    │   │   │       └── okvqa_safe.yaml
    │   │   │   ├── okvqa
    │   │   │       ├── defaults.yaml
    │   │   │       ├── defaults_v10.yaml
    │   │   │       ├── train_val.yaml
    │   │   │       └── train_val_okvqav10.yaml
    │   │   │   └── vqa2
    │   │   │       └── defaults_okvqasafe.yaml
    │   ├── graphnetwork_module.py
    │   └── requirements.txt
    ├── lorra
    │   ├── README.md
    │   └── configs
    │   │   ├── textvqa
    │   │       └── defaults.yaml
    │   │   ├── vizwiz
    │   │       └── defaults.yaml
    │   │   └── vqa2
    │   │       ├── defaults.yaml
    │   │       ├── train_val.yaml
    │   │       └── train_val_resnet_only.yaml
    ├── lxmert
    │   ├── README.md
    │   └── configs
    │   │   ├── coco
    │   │       ├── masked.yaml
    │   │       └── pretrain.yaml
    │   │   ├── defaults.yaml
    │   │   ├── gqa
    │   │       ├── masked.yaml
    │   │       └── pretrain.yaml
    │   │   ├── pretrain.yaml
    │   │   ├── visual_genome
    │   │       ├── masked.yaml
    │   │       └── pretrain.yaml
    │   │   └── vqa2
    │   │       ├── defaults.yaml
    │   │       ├── masked.yaml
    │   │       └── pretrain.yaml
    ├── m4c
    │   ├── README.md
    │   ├── configs
    │   │   ├── ocrvqa
    │   │   │   └── defaults.yaml
    │   │   ├── stvqa
    │   │   │   └── defaults.yaml
    │   │   └── textvqa
    │   │   │   ├── defaults.yaml
    │   │   │   ├── joint_with_stvqa.yaml
    │   │   │   └── ocr_ml.yaml
    │   └── scripts
    │   │   ├── __init__.py
    │   │   └── extract_ocr_frcn_feature.py
    ├── m4c_captioner
    │   ├── README.md
    │   ├── configs
    │   │   ├── butd
    │   │   │   └── textcaps
    │   │   └── m4c_captioner
    │   │   │   ├── coco
    │   │   │       ├── defaults.yaml
    │   │   │       └── eval_on_textcaps.yaml
    │   │   │   └── textcaps
    │   │   │       ├── defaults.yaml
    │   │   │       ├── joint_with_coco.yaml
    │   │   │       ├── with_caffe2_feat.yaml
    │   │   │       └── without_ocr.yaml
    │   └── scripts
    │   │   ├── __init__.py
    │   │   ├── coco_eval.py
    │   │   └── textcaps_eval.py
    ├── mmbt
    │   ├── README.md
    │   └── configs
    │   │   ├── hateful_memes
    │   │       ├── defaults.yaml
    │   │       ├── hateful_with_refiner.yaml
    │   │       └── with_features.yaml
    │   │   ├── masked_coco
    │   │       └── defaults.yaml
    │   │   ├── mmimdb
    │   │       ├── defaults.yaml
    │   │       ├── paper_ablations_reducedlabel.yaml
    │   │       └── with_features.yaml
    │   │   ├── okvqa
    │   │       └── with_images.yaml
    │   │   └── vqa2
    │   │       └── with_raw_images.yaml
    ├── mmf_transformer
    │   ├── configs
    │   │   ├── airstore
    │   │   │   └── masked_coco.yaml
    │   │   ├── charades
    │   │   │   └── direct.yaml
    │   │   ├── hateful_memes
    │   │   │   ├── defaults.yaml
    │   │   │   └── hateful_with_refiner.yaml
    │   │   ├── masked_coco
    │   │   │   ├── defaults.yaml
    │   │   │   └── pretrain_itm.yaml
    │   │   ├── okvqa
    │   │   │   └── defaults.yaml
    │   │   └── vqa2
    │   │   │   └── defaults.yaml
    │   └── localized_narratives
    │   │   └── masked.yaml
    ├── movie_mcan
    │   ├── README.md
    │   └── configs
    │   │   └── vqa2
    │   │       ├── defaults.yaml
    │   │       └── e2e.yaml
    ├── others
    │   ├── cnn_lstm
    │   │   ├── clevr
    │   │   │   └── defaults.yaml
    │   │   └── hateful_memes
    │   │   │   └── defaults.yaml
    │   ├── concat_bert
    │   │   └── hateful_memes
    │   │   │   └── defaults.yaml
    │   ├── concat_bow
    │   │   └── hateful_memes
    │   │   │   └── defaults.yaml
    │   ├── late_fusion
    │   │   └── hateful_memes
    │   │   │   └── defaults.yaml
    │   ├── mmf_bert
    │   │   └── configs
    │   │   │   ├── masked_coco
    │   │   │       ├── defaults.yaml
    │   │   │       ├── pretrain.yaml
    │   │   │       └── pretrain_joint_vqa2.yaml
    │   │   │   ├── masked_conceptual_captions
    │   │   │       ├── defaults.yaml
    │   │   │       └── pretrain.yaml
    │   │   │   ├── masked_vqa2
    │   │   │       ├── defaults.yaml
    │   │   │       └── pretrain.yaml
    │   │   │   ├── visual_entailment
    │   │   │       └── defaults.yaml
    │   │   │   ├── vizwiz
    │   │   │       └── defaults.yaml
    │   │   │   └── vqa2
    │   │   │       └── defaults.yaml
    │   └── unimodal
    │   │   └── configs
    │   │       └── hateful_memes
    │   │           ├── bert.yaml
    │   │           ├── image.yaml
    │   │           ├── text.yaml
    │   │           └── with_features.yaml
    ├── pretrain_vl_right
    │   ├── README.md
    │   └── configs
    │   │   ├── vilbert
    │   │       ├── masked_coco
    │   │       │   ├── defaults.yaml
    │   │       │   ├── fifty_pc.yaml
    │   │       │   ├── full.yaml
    │   │       │   └── ten_pc.yaml
    │   │       ├── masked_conceptual_captions
    │   │       │   ├── defaults.yaml
    │   │       │   ├── full.yaml
    │   │       │   ├── full_coco_generated.yaml
    │   │       │   ├── half.yaml
    │   │       │   ├── half_coco_generated.yaml
    │   │       │   ├── small.yaml
    │   │       │   ├── small_coco_generated.yaml
    │   │       │   ├── small_fifty_pc.yaml
    │   │       │   └── small_ten_pc.yaml
    │   │       └── masked_vqa2
    │   │       │   ├── defaults.yaml
    │   │       │   ├── fifty_pc.yaml
    │   │       │   ├── full.yaml
    │   │       │   └── ten_pc.yaml
    │   │   └── visual_bert
    │   │       ├── masked_coco
    │   │           ├── defaults.yaml
    │   │           ├── fifty_pc.yaml
    │   │           ├── full.yaml
    │   │           ├── full_train_val.yaml
    │   │           └── ten_pc.yaml
    │   │       ├── masked_conceptual_captions
    │   │           ├── defaults.yaml
    │   │           ├── full.yaml
    │   │           ├── full_coco_generated.yaml
    │   │           ├── half.yaml
    │   │           ├── half_coco_generated.yaml
    │   │           ├── small.yaml
    │   │           ├── small_coco_generated.yaml
    │   │           ├── small_fifty_pc.yaml
    │   │           └── small_ten_pc.yaml
    │   │       └── masked_vqa2
    │   │           ├── defaults.yaml
    │   │           ├── fifty_pc.yaml
    │   │           ├── full.yaml
    │   │           ├── full_train_val.yaml
    │   │           └── ten_pc.yaml
    ├── pythia
    │   ├── README.md
    │   └── configs
    │   │   ├── masked_q_vqa2
    │   │       └── defaults.yaml
    │   │   ├── multihead
    │   │       └── defaults.yaml
    │   │   ├── textvqa
    │   │       └── defaults.yaml
    │   │   ├── visual_genome
    │   │       └── defaults.yaml
    │   │   ├── vizwiz
    │   │       └── defaults.yaml
    │   │   └── vqa2
    │   │       ├── 12k_iterations_without_resnet.yaml
    │   │       ├── debug.yaml
    │   │       ├── defaults.yaml
    │   │       ├── resnet_only.yaml
    │   │       ├── train_val.yaml
    │   │       └── train_val_resnet_only.yaml
    ├── unit
    │   ├── README.md
    │   └── configs
    │   │   ├── all_8_datasets
    │   │       ├── separate_dec.yaml
    │   │       ├── shared_dec.yaml
    │   │       └── shared_dec_without_task_embedding.yaml
    │   │   ├── coco
    │   │       ├── single_task.yaml
    │   │       └── single_task_without_task_embedding.yaml
    │   │   ├── coco_vg_vqa2
    │   │       ├── separate_dec.yaml
    │   │       └── shared_dec.yaml
    │   │   ├── coco_vqa2
    │   │       ├── separate_dec.yaml
    │   │       └── shared_dec.yaml
    │   │   ├── vg
    │   │       └── single_task.yaml
    │   │   ├── vg_vqa2
    │   │       ├── separate_dec.yaml
    │   │       └── shared_dec.yaml
    │   │   ├── visual_entailment_dataset_cfg.yaml
    │   │   ├── vqa2
    │   │       └── single_task.yaml
    │   │   └── vqa2_dataset_cfg.yaml
    ├── uniter
    │   ├── README.md
    │   └── configs
    │   │   ├── masked_coco
    │   │       └── defaults.yaml
    │   │   └── vqa2
    │   │       └── defaults.yaml
    ├── vilbert
    │   ├── README.md
    │   └── configs
    │   │   ├── hateful_memes
    │   │       ├── defaults.yaml
    │   │       ├── direct.yaml
    │   │       └── from_cc.yaml
    │   │   ├── masked_coco
    │   │       ├── defaults.yaml
    │   │       ├── pretrain.yaml
    │   │       └── pretrain_train_val.yaml
    │   │   ├── masked_conceptual_captions
    │   │       ├── defaults.yaml
    │   │       └── pretrain.yaml
    │   │   ├── masked_vqa2
    │   │       ├── defaults.yaml
    │   │       ├── pretrain.yaml
    │   │       └── pretrain_train_val.yaml
    │   │   ├── mmimdb
    │   │       ├── defaults.yaml
    │   │       └── pretrain.yaml
    │   │   ├── nlvr2
    │   │       └── defaults.yaml
    │   │   ├── visual_entailment
    │   │       └── defaults.yaml
    │   │   ├── vizwiz
    │   │       └── defaults.yaml
    │   │   └── vqa2
    │   │       ├── defaults.yaml
    │   │       └── train_val.yaml
    ├── vilt
    │   ├── README.md
    │   └── configs
    │   │   ├── masked_coco
    │   │       ├── defaults.yaml
    │   │       └── pretrain.yaml
    │   │   └── vqa2
    │   │       ├── defaults.yaml
    │   │       ├── vit_b16_224.yaml
    │   │       └── vit_b32_384.yaml
    ├── vinvl
    │   ├── README.md
    │   └── configs
    │   │   └── vqa2
    │   │       └── defaults.yaml
    └── visual_bert
    │   ├── README.md
    │   └── configs
    │       ├── gqa
    │           └── defaults.yaml
    │       ├── hateful_memes
    │           ├── defaults.yaml
    │           ├── direct.yaml
    │           └── from_coco.yaml
    │       ├── localized_narratives
    │           ├── defaults.yaml
    │           └── pretrain.yaml
    │       ├── masked_coco
    │           ├── defaults.yaml
    │           ├── pretrain.yaml
    │           └── pretrain_train_val.yaml
    │       ├── masked_conceptual_captions
    │           ├── defaults.yaml
    │           └── pretrain.yaml
    │       ├── masked_gqa
    │           └── defaults.yaml
    │       ├── masked_sbu
    │           ├── defaults.yaml
    │           └── pretrain.yaml
    │       ├── masked_vqa2
    │           ├── defaults.yaml
    │           ├── pretrain.yaml
    │           └── pretrain_train_val.yaml
    │       ├── mmimdb
    │           ├── defaults.yaml
    │           └── pretrain.yaml
    │       ├── nlvr2
    │           └── defaults.yaml
    │       ├── visual_entailment
    │           ├── defaults.yaml
    │           └── train_val.yaml
    │       ├── vizwiz
    │           ├── defaults.yaml
    │           └── train_val.yaml
    │       └── vqa2
    │           ├── defaults.yaml
    │           ├── train_val.yaml
    │           └── with_raw_images.yaml
├── pyproject.toml
├── requirements.txt
├── setup.py
├── tests
    ├── __init__.py
    ├── common
    │   ├── __init__.py
    │   ├── test_batch_collator.py
    │   ├── test_meter.py
    │   ├── test_report.py
    │   └── test_sample.py
    ├── configs
    │   ├── __init__.py
    │   ├── test_configs_for_keys.py
    │   └── test_zoo_urls.py
    ├── conftest.py
    ├── data
    │   ├── user_dir
    │   │   ├── __init__.py
    │   │   ├── configs
    │   │   │   ├── always_one.yaml
    │   │   │   ├── experiment.yaml
    │   │   │   └── simple.yaml
    │   │   ├── datasets
    │   │   │   ├── __init__.py
    │   │   │   └── always_one.py
    │   │   └── models
    │   │   │   ├── __init__.py
    │   │   │   └── simple.py
    │   └── vocab.txt
    ├── datasets
    │   ├── __init__.py
    │   ├── test_base_dataset.py
    │   ├── test_bert_processors.py
    │   ├── test_iteration_strategies.py
    │   ├── test_mmf_dataset_builder.py
    │   ├── test_multi_datamodule.py
    │   ├── test_multi_dataset_loader.py
    │   ├── test_prediction_processors.py
    │   └── test_processors.py
    ├── models
    │   ├── __init__.py
    │   ├── interfaces
    │   │   ├── __init__.py
    │   │   └── test_interfaces.py
    │   ├── test_albef.py
    │   ├── test_cnn_lstm.py
    │   ├── test_mmbt.py
    │   ├── test_mmf_transformer.py
    │   ├── test_uniter.py
    │   ├── test_vilbert.py
    │   ├── test_vilt.py
    │   ├── test_vinvl.py
    │   ├── test_visual_bert.py
    │   └── transformers
    │   │   ├── __init__.py
    │   │   ├── test_heads.py
    │   │   └── test_heads_dict.py
    ├── modules
    │   ├── __init__.py
    │   ├── test_encoders.py
    │   ├── test_fusions.py
    │   ├── test_hf_layers.py
    │   ├── test_layers.py
    │   ├── test_losses.py
    │   ├── test_metrics.py
    │   ├── test_optimizers.py
    │   ├── test_poolers.py
    │   └── test_vit.py
    ├── test_utils.py
    ├── trainers
    │   ├── __init__.py
    │   ├── callbacks
    │   │   ├── __init__.py
    │   │   ├── test_logistics.py
    │   │   ├── test_lr_scheduler.py
    │   │   └── test_user_callback.py
    │   ├── lightning
    │   │   ├── __init__.py
    │   │   ├── lightning_trainer_mock.py
    │   │   ├── test_checkpoint.py
    │   │   ├── test_grad_accumulate.py
    │   │   ├── test_grad_clipping.py
    │   │   ├── test_logging.py
    │   │   ├── test_loop_conditions.py
    │   │   ├── test_loss.py
    │   │   ├── test_lr_schedule.py
    │   │   └── test_validation.py
    │   ├── test_device.py
    │   ├── test_eval_loop.py
    │   ├── test_fp16.py
    │   ├── test_sharded_ddp.py
    │   ├── test_trainer_mocks.py
    │   ├── test_training_loop.py
    │   └── test_utils.py
    └── utils
    │   ├── __init__.py
    │   ├── test_checkpoint.py
    │   ├── test_configuration.py
    │   ├── test_distributed.py
    │   ├── test_download.py
    │   ├── test_env.py
    │   ├── test_file_io.py
    │   ├── test_general.py
    │   ├── test_logger.py
    │   ├── test_model.py
    │   ├── test_patch.py
    │   ├── test_quality_checks.py
    │   ├── test_text.py
    │   ├── test_timer.py
    │   └── test_visualize.py
├── tools
    ├── __init__.py
    ├── scripts
    │   ├── __init__.py
    │   ├── bert
    │   │   ├── extract_bert.sh
    │   │   └── extract_bert_embeddings.py
    │   ├── coco
    │   │   └── coco_caption_eval.py
    │   ├── features
    │   │   ├── extract_features_vinvl.py
    │   │   ├── extract_features_vmb.py
    │   │   ├── extract_resnet152_feat.py
    │   │   ├── extraction_utils.py
    │   │   ├── frcnn
    │   │   │   ├── extract_features_frcnn.py
    │   │   │   ├── frcnn_utils.py
    │   │   │   ├── modeling_frcnn.py
    │   │   │   └── processing_image.py
    │   │   └── lmdb_conversion.py
    │   ├── gqa
    │   │   ├── README.md
    │   │   ├── convert_gqa_to_vqa.py
    │   │   └── extract_vocabulary.py
    │   ├── tests
    │   │   └── generate_test_data.py
    │   └── visual_dialog
    │   │   ├── build_imdb.py
    │   │   └── extract_vocabulary.py
    └── sweeps
    │   ├── README.md
    │   ├── lib
    │       ├── __init__.py
    │       └── slurm.py
    │   └── sweep_visual_bert.py
└── website
    ├── .eslintignore
    ├── .eslintrc.js
    ├── .gitignore
    ├── .prettierignore
    ├── .prettierrc
    ├── .stylelintrc.js
    ├── README.md
    ├── build_docs.sh
    ├── docs
        ├── challenges
        │   ├── hateful_memes_challenge.md
        │   ├── textvqa_challenge.md
        │   └── vqa_challenge.md
        ├── getting_started
        │   ├── faqs.md
        │   ├── features.md
        │   ├── installation.mdx
        │   ├── quickstart.md
        │   └── video_overview.md
        ├── notes
        │   ├── concepts.md
        │   ├── configuration.md
        │   ├── dataset_zoo.md
        │   ├── logging.md
        │   ├── model_zoo.md
        │   ├── pretrained_models.md
        │   ├── projects.md
        │   └── training_tricks.md
        ├── projects
        │   ├── butd.md
        │   ├── m4c.md
        │   ├── m4c_captioner.md
        │   ├── movie_mcan.md
        │   ├── unit.md
        │   ├── uniter.md
        │   ├── vilt.md
        │   └── vinvl.md
        └── tutorials
        │   ├── checkpointing.md
        │   ├── concat_bert_tutorial.md
        │   ├── dataset.md
        │   ├── image_feature_extraction.md
        │   ├── image_feature_extraction_vinvl.md
        │   ├── losses.md
        │   ├── metrics.md
        │   ├── processors.md
        │   ├── pytorchvideo.md
        │   └── slurm.md
    ├── docusaurus.config.js
    ├── package.json
    ├── sidebars.js
    ├── src
        ├── css
        │   └── custom.css
        └── pages
        │   ├── api_redirect
        │       └── index.js
        │   ├── index.js
        │   └── styles.module.css
    ├── static
        ├── .circleci
        │   └── config.yml
        ├── .nojekyll
        ├── CNAME
        └── img
        │   ├── banner_logo.svg
        │   ├── boilerplate.svg
        │   ├── boilerplate_white.svg
        │   ├── favicon.png
        │   ├── logo.png
        │   ├── logo.svg
        │   ├── logo_white_f.png
        │   ├── logo_white_f.svg
        │   ├── logo_white_text.svg
        │   ├── oss_logo.png
        │   ├── puzzle_pieces.svg
        │   ├── puzzle_pieces_white.svg
        │   ├── pytorch_logo.svg
        │   ├── pytorch_logo_white.svg
        │   ├── undraw_docusaurus_react.svg
        │   └── undraw_docusaurus_tree.svg
    └── yarn.lock


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*.py]
 4 | charset = utf-8
 5 | trim_trailing_whitespace = true
 6 | end_of_line = lf
 7 | insert_final_newline = true
 8 | indent_style = space
 9 | indent_size = 4
10 | 
11 | [*.md]
12 | trim_trailing_whitespace = false
13 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | # This is an example .flake8 config used when developing *Black* itself.
2 | 
3 | [flake8]
4 | max-line-length = 88
5 | max-complexity = 18
6 | select = B,C,E,F,W,T4,B9
7 | ignore = E203, E266, C901, C408, W503
8 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | 
4 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
5 | Please read the [full text](https://code.fb.com/codeofconduct/)
6 | so that you can understand what actions will and will not be tolerated.
7 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | # require an issue template to be chosen
2 | blank_issues_enabled: false
3 | 
4 | contact_links:
5 |   - name: MMF Documentation
6 |     url: https://mmf.sh/docs
7 |     about: Check if your issue/documentation is already answered in docs
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680Feature Request"
 3 | about: Submit a proposal/request for a new MMF feature
 4 | 
 5 | ---
 6 | 
 7 | ## 🚀 Feature
 8 | <!-- A clear and concise description of the feature proposal -->
 9 | 
10 | ## Motivation
11 | 
12 | <!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
13 | 
14 | ## Pitch
15 | 
16 | <!-- A clear and concise description of what you want to happen. -->
17 | 
18 | ## Alternatives
19 | 
20 | <!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
21 | 
22 | ## Additional context
23 | 
24 | <!-- Add any other context or screenshots about the feature request here. -->
25 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/questions-help-support.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "❓Questions/Help/Support"
3 | about: Do you need support?
4 | 
5 | ---
6 | 
7 | ## ❓ Questions and Help
8 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | Thanks for your contribution!
2 | 
3 | If you're sending a large PR (e.g., >50 lines), please open an issue first about
4 | the feature/bug, and indicate how you want to contribute.
5 | 
6 | Use [contributing guidelines](https://github.com/facebookresearch/mmf/tree/main/.github/CONTRIBUTING.md) before opening up the PR to follow MMF style guidelines.
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.log
 2 | *.err
 3 | *.pyc
 4 | *.swp
 5 | .idea/*
 6 | **/__pycache__/*
 7 | **/output/*
 8 | data/.DS_Store
 9 | docs/build
10 | results/*
11 | build
12 | dist
13 | boards/*
14 | *.egg-info/
15 | checkpoint
16 | *.pth
17 | *.ckpt
18 | *_cache
19 | .cache
20 | data
21 | save
22 | *.eggs
23 | .eggs
24 | eggs/
25 | *.egg
26 | .DS_Store
27 | .vscode
28 | .vscode/*
29 | *.so
30 | *-checkpoint.ipynb
31 | !tests/data
32 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include LICENSE
3 | include NOTICES
4 | recursive-include mmf/configs/ *.yaml
5 | recursive-include projects/ *.yaml
6 | 


--------------------------------------------------------------------------------
/PACKAGE:
--------------------------------------------------------------------------------
1 | load("@fbcode_macros//build_defs/lib:third_party.bzl", "third_party")
2 | 
3 | third_party.gen_overrides({"pypi/transformers": "3.4.0-transitional"})
4 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | # Needed to ignore pytorch_sphinx_theme requirement clone
2 | src
3 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = mmf
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/license_header.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | recommonmark==0.5.0
2 | sphinx
3 | sphinx_rtd_theme==0.4.3
4 | sphinxcontrib-programoutput==0.16
5 | -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
6 | 


--------------------------------------------------------------------------------
/docs/source/_static/images/chevron-right-orange.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 17.1.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 4 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 5 | 	 viewBox="0.3 0.3 8.2 14.4" enable-background="new 0.3 0.3 8.2 14.4" xml:space="preserve">
 6 | <title>Page 1</title>
 7 | <desc>Created with Sketch.</desc>
 8 | <g id="desktop">
 9 | 	<g id="_x30_1_x5F_Home" transform="translate(-864.000000, -1683.000000)">
10 | 		<g id="community" transform="translate(-18.000000, 1391.000000)">
11 | 			<g id="cta" transform="translate(741.000000, 277.000000)">
12 | 				<polyline id="Page-1" fill="none" stroke="#0054a6" stroke-width="2" points="142,16 148.1,22.5 142,29 				"/>
13 | 			</g>
14 | 		</g>
15 | 	</g>
16 | </g>
17 | </svg>
18 | 


--------------------------------------------------------------------------------
/docs/source/_static/images/chevron_blue.svg:
--------------------------------------------------------------------------------
 1 | 
 2 | <?xml version="1.0" encoding="utf-8"?>
 3 | <!-- Generator: Adobe Illustrator 17.1.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 4 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 5 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 6 | 	 viewBox="0.3 0.3 8.2 14.4" enable-background="new 0.3 0.3 8.2 14.4" xml:space="preserve">
 7 | <title>Page 1</title>
 8 | <desc>Created with Sketch.</desc>
 9 | <g id="desktop">
10 | 	<g id="_x30_1_x5F_Home" transform="translate(-864.000000, -1683.000000)">
11 | 		<g id="community" transform="translate(-18.000000, 1391.000000)">
12 | 			<g id="cta" transform="translate(741.000000, 277.000000)">
13 | 				<polyline id="Page-1" fill="none" stroke="#0054a6" stroke-width="2" points="142,16 148.1,22.5 142,29 				"/>
14 | 			</g>
15 | 		</g>
16 | 	</g>
17 | </g>
18 | </svg>
19 | 


--------------------------------------------------------------------------------
/docs/source/_static/images/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/docs/source/_static/images/favicon.png


--------------------------------------------------------------------------------
/docs/source/_static/images/mmf_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/docs/source/_static/images/mmf_logo.png


--------------------------------------------------------------------------------
/docs/source/_static/images/view-page-source-icon.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 22.1.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg width="20px" height="20px" version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 300 225.9" style="enable-background:new 0 0 300 225.9;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#0054a6;}
 7 | </style>
 8 | <g>
 9 | 	<path class="st0" d="M0,0v225.9h300V0H0z M282.4,208.3H17.6V17.6h264.7V208.3z"/>
10 | 	<polygon class="st0" points="64.4,166.1 76.8,178.5 142.4,112.9 76.8,47.3 64.4,59.8 64.4,59.8 117.5,112.9 	"/>
11 | 	<rect x="158.8" y="156.4" class="st0" width="88.2" height="17.6"/>
12 | </g>
13 | </svg>
14 | 


--------------------------------------------------------------------------------
/docs/source/_static/js/ga.js:
--------------------------------------------------------------------------------
1 | window.dataLayer = window.dataLayer || [];
2 | function gtag(){dataLayer.push(arguments);}
3 | gtag('js', new Date());
4 | 
5 | gtag('config', 'UA-135079836-3');
6 | 


--------------------------------------------------------------------------------
/docs/source/_static/js/redirect.js:
--------------------------------------------------------------------------------
1 | // Redirect for older pythia documentation
2 | (function(l) {
3 |   if (window.location.href.indexOf('readthedocs') !== -1) {
4 |     window.location.href = "https://mmf.sh/api";
5 |  }
6 | }(window.location));
7 | 


--------------------------------------------------------------------------------
/docs/source/_templates/theme_variables.jinja:
--------------------------------------------------------------------------------
 1 | {%-
 2 | set external_urls = {
 3 |   'github': 'https://github.com/facebookresearch/mmf',
 4 |   'github_issues': 'https://github.com/facebookresearch/mmf/issues',
 5 |   'contributing': 'https://github.com/facebookresearch/mmf/blob/main/CONTRIBUTING.md',
 6 |   'api': 'https://mmf.sh/api',
 7 |   'docs': 'https://mmf.sh/docs',
 8 |   'previous_pytorch_versions': 'https://mmf.sh/previous-versions/',
 9 |   'home': 'https://mmf.sh/',
10 |   'get_started': 'https://mmf.sh/docs',
11 |   'features': 'https://mmf.sh/docs/getting_started/features',
12 |   'brand_guidelines': 'https://pytorch.org/assets/brand-guidelines/PyTorch-Brand-Guidelines.pdf'
13 | }
14 | -%}
15 | {%-
16 | set og = {
17 |   'description': 'API docs for MMF. MMF is a modular framework powered by PyTorch for multimodal vision and language research from Facebook AI Research'
18 | }
19 | -%}
20 | 


--------------------------------------------------------------------------------
/docs/source/lib/common/registry.rst:
--------------------------------------------------------------------------------
1 | common.registry
2 | ===============
3 | 
4 | .. automodule:: mmf.common.registry
5 |   :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/lib/common/sample.rst:
--------------------------------------------------------------------------------
1 | common.sample
2 | ===============
3 | 
4 | .. automodule:: mmf.common.sample
5 |   :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/lib/datasets/base_dataset.rst:
--------------------------------------------------------------------------------
1 | datasets.base_dataset
2 | =====================
3 | 
4 | .. automodule:: mmf.datasets.base_dataset
5 |   :members:
6 |   :private-members:
7 | 


--------------------------------------------------------------------------------
/docs/source/lib/datasets/base_dataset_builder.rst:
--------------------------------------------------------------------------------
1 | datasets.base_dataset_builder
2 | =============================
3 | 
4 | .. automodule:: mmf.datasets.base_dataset_builder
5 |   :members:
6 |   :private-members:
7 | 


--------------------------------------------------------------------------------
/docs/source/lib/datasets/processors.rst:
--------------------------------------------------------------------------------
 1 | datasets.processors
 2 | ===================
 3 | 
 4 | .. automodule:: mmf.datasets.processors.processors
 5 |   :members:
 6 |   :private-members:
 7 | 
 8 | .. automodule:: mmf.datasets.processors.image_processors
 9 |   :members:
10 |   :private-members:
11 | 
12 | .. automodule:: mmf.datasets.processors.bert_processors
13 |   :members:
14 |   :private-members:
15 | 


--------------------------------------------------------------------------------
/docs/source/lib/models/base_model.rst:
--------------------------------------------------------------------------------
1 | models.base_model
2 | =================
3 | 
4 | .. automodule:: mmf.models.base_model
5 |   :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/lib/modules/losses.rst:
--------------------------------------------------------------------------------
1 | modules.losses
2 | ===============
3 | 
4 | .. automodule:: mmf.modules.losses
5 |   :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/lib/modules/metrics.rst:
--------------------------------------------------------------------------------
1 | modules.metrics
2 | ===============
3 | 
4 | .. automodule:: mmf.modules.metrics
5 |   :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/lib/utils/text.rst:
--------------------------------------------------------------------------------
1 | utils.text
2 | ===============
3 | 
4 | .. automodule:: mmf.utils.text
5 |   :members:
6 | 


--------------------------------------------------------------------------------
/mmf/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # isort:skip_file
 3 | # flake8: noqa: F401
 4 | from mmf.utils.patch import patch_transformers
 5 | 
 6 | patch_transformers()
 7 | 
 8 | from mmf import common, datasets, models, modules, utils
 9 | from mmf.modules import losses, metrics, optimizers, poolers, schedulers
10 | from mmf.version import __version__
11 | 
12 | 
13 | __all__ = [
14 |     "utils",
15 |     "common",
16 |     "modules",
17 |     "datasets",
18 |     "models",
19 |     "losses",
20 |     "poolers",
21 |     "schedulers",
22 |     "optimizers",
23 |     "metrics",
24 | ]
25 | 


--------------------------------------------------------------------------------
/mmf/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .meter import Meter
3 | from .registry import registry
4 | from .sample import Sample, SampleList
5 | 
6 | 
7 | __all__ = ["Sample", "SampleList", "Meter", "registry"]
8 | 


--------------------------------------------------------------------------------
/mmf/common/batch_collator.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.sample import convert_batch_to_sample_list
 3 | 
 4 | 
 5 | class BatchCollator:
 6 |     def __init__(self, dataset_name, dataset_type):
 7 |         self._dataset_name = dataset_name
 8 |         self._dataset_type = dataset_type
 9 | 
10 |     def __call__(self, batch):
11 |         sample_list = convert_batch_to_sample_list(batch)
12 |         sample_list.dataset_name = self._dataset_name
13 |         sample_list.dataset_type = self._dataset_type
14 |         return sample_list
15 | 


--------------------------------------------------------------------------------
/mmf/common/typings.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from dataclasses import dataclass
 3 | from typing import Any, Dict, List
 4 | 
 5 | 
 6 | @dataclass
 7 | class PerSetAttributeType:
 8 |     train: List[str]
 9 |     val: List[str]
10 |     test: List[str]
11 | 
12 | 
13 | @dataclass
14 | class ProcessorConfigType:
15 |     type: str
16 |     params: Dict[str, Any]
17 | 
18 | 
19 | @dataclass
20 | class MMFDatasetConfigType:
21 |     data_dir: str
22 |     use_images: bool
23 |     use_features: bool
24 |     zoo_requirements: List[str]
25 |     images: PerSetAttributeType
26 |     features: PerSetAttributeType
27 |     annotations: PerSetAttributeType
28 |     processors: Dict[str, ProcessorConfigType]
29 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/clevr/defaults.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   clevr:
 3 |     data_dir: ${env.data_dir}
 4 |     data_folder: CLEVR_v1.0
 5 |     build_attributes:
 6 |       min_count: 1
 7 |       split_regex: " "
 8 |       keep:
 9 |         - ";"
10 |         - ","
11 |       remove:
12 |         - "?"
13 |         - "."
14 |     processors:
15 |       text_processor:
16 |         type: vocab
17 |         params:
18 |           max_length: 10
19 |           vocab:
20 |             type: random
21 |             vocab_file: vocabs/clevr_question_vocab.txt
22 |         preprocessor:
23 |           type: simple_sentence
24 |           params: {}
25 |       answer_processor:
26 |         type: multi_hot_answer_from_vocab
27 |         params:
28 |           num_answers: 1
29 |           # Vocab file is relative to [data_dir]/[data_folder]
30 |           vocab_file: vocabs/clevr_answer_vocab.txt
31 |           preprocessor:
32 |             type: simple_word
33 |             params: {}
34 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/conceptual_captions/train_small.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 |   conceptual_captions:
3 |     annotations:
4 |       train:
5 |       - cc/defaults/annotations/train_small.npy
6 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/bert.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   hateful_memes:
 3 |     processors:
 4 |       text_processor:
 5 |         type: bert_tokenizer
 6 |         params:
 7 |           tokenizer_config:
 8 |             type: bert-base-uncased
 9 |             params:
10 |               do_lower_case: true
11 |           mask_probability: 0
12 |           max_seq_length: 128
13 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/attack_vectors.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./with_features.yaml
 3 | 
 4 | dataset_config:
 5 |   hateful_memes:
 6 |     fg_dataset_type: attack
 7 |     is_multilabel: true
 8 |     processors:
 9 |         answer_processor:
10 |           type: multi_hot_answer_from_vocab
11 |           params:
12 |               num_answers: 1
13 |               vocab_file: hateful_memes/fine_grained/labels/attack_vocab.txt
14 |               preprocessor:
15 |                   type: simple_word
16 |                   params: {}
17 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/defaults.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   hateful_memes:
 3 |     zoo_requirements:
 4 |       - hateful_memes.defaults
 5 |       - hateful_memes.fine_grained
 6 |     annotations:
 7 |       train:
 8 |       - hateful_memes/fine_grained/annotations/train_fg.jsonl
 9 |       val:
10 |       - hateful_memes/fine_grained/annotations/dev_seen_fg.jsonl
11 |       test:
12 |       - hateful_memes/fine_grained/annotations/test_seen_fg.jsonl
13 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/hateful_pc_attack.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./with_features.yaml
 3 | 
 4 | dataset_config:
 5 |   hateful_memes:
 6 |     fg_dataset_type: hateful_pc_attack
 7 |     is_multilabel: true
 8 |     processors:
 9 |         answer_processor:
10 |           type: multi_hot_answer_from_vocab
11 |           params:
12 |               num_answers: 1
13 |               vocab_file: hateful_memes/fine_grained/labels/hateful_pc_attack_vocab.txt
14 |               preprocessor:
15 |                   type: simple_word
16 |                   params: {}
17 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/pc_attack.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./with_features.yaml
 3 | 
 4 | dataset_config:
 5 |   hateful_memes:
 6 |     fg_dataset_type: pc_attack
 7 |     is_multilabel: true
 8 |     processors:
 9 |         answer_processor:
10 |           type: multi_hot_answer_from_vocab
11 |           params:
12 |               num_answers: 1
13 |               vocab_file: hateful_memes/fine_grained/labels/pc_attack_vocab.txt
14 |               preprocessor:
15 |                   type: simple_word
16 |                   params: {}
17 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/protected_groups.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./with_features.yaml
 3 | 
 4 | dataset_config:
 5 |   hateful_memes:
 6 |     fg_dataset_type: pc
 7 |     is_multilabel: true
 8 |     processors:
 9 |         answer_processor:
10 |           type: multi_hot_answer_from_vocab
11 |           params:
12 |               num_answers: 1
13 |               vocab_file: hateful_memes/fine_grained/labels/pc_vocab.txt
14 |               preprocessor:
15 |                   type: simple_word
16 |                   params: {}
17 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/fine_grained/with_features.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   hateful_memes:
 6 |     use_images: false
 7 |     use_features: true
 8 |     # Disable this in your config if you do not need features info
 9 |     # and are running out of memory
10 |     return_features_info: true
11 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/hateful_memes/with_features.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 |   hateful_memes:
3 |     use_images: false
4 |     use_features: true
5 |     # Disable this in your config if you do not need features info
6 |     # and are running out of memory
7 |     return_features_info: true
8 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/mmimdb/with_features.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 |   mmimdb:
3 |     use_images: false
4 |     use_features: true
5 |     # Disable this in your config if you do not need features info
6 |     # and are running out of memory
7 |     return_features_info: false
8 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/textvqa/with_resnet.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   textvqa:
 3 |     features:
 4 |       train:
 5 |       - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/defaults/features/open_images/resnet152.lmdb
 6 |       val:
 7 |       - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/defaults/features/open_images/resnet152.lmdb
 8 |       test:
 9 |       - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/defaults/features/open_images/resnet152.lmdb
10 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/vinvl/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../vqa2/defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   vinvl:
 6 |     base_dataset_name: vqa2
 7 |     label_map: /private/home/ryanjiang/winoground/pretrained_models/VG-SGG-dicts-vgoi6-clipped.json
 8 |     base_dataset: ${dataset_config.vqa2}
 9 |     processors:
10 |       text_processor:
11 |         type: vinvl_text_tokenizer
12 |         params:
13 |           mask_probability: 0
14 | 


--------------------------------------------------------------------------------
/mmf/configs/datasets/vqa2/with_raw_images.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   vqa2:
 3 |       use_images: true
 4 |       use_features: false
 5 |       processors:
 6 |         image_processor:
 7 |           type: torchvision_transforms
 8 |           params:
 9 |             transforms:
10 |             - type: Resize
11 |               params:
12 |                   size: [256, 256]
13 |             - type: CenterCrop
14 |               params:
15 |                 size: [224, 224]
16 |             - ToTensor
17 |             - GrayScaleTo3Channels
18 |             - type: Normalize
19 |               params:
20 |                 mean: [0.46777044, 0.44531429, 0.40661017]
21 |                 std: [0.12221994, 0.12145835, 0.14380469]
22 | 


--------------------------------------------------------------------------------
/mmf/configs/models/ban/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   ban:
 3 |     losses:
 4 |     - type: logit_bce
 5 |     text_embedding:
 6 |       num_hidden: 1280
 7 |       vocab_size: 1280
 8 |       emb_size: 300
 9 |       num_layers: 1
10 |       dropout: 0.0
11 |       bidirectional: False
12 |       rnn_type: 'GRU'
13 |     bilinear_attention:
14 |       bc_net:
15 |         k: 1
16 |         dropout: [0.2, 0.5]
17 |         h_out:
18 |       fc_net:
19 |         dims: 600
20 |         activation:
21 |         dropout: 0.2
22 |       gamma: 4
23 |       visual_feat_dim: 2048
24 |     classifier:
25 |       # out dim will be taken from registry as set by dataset builder
26 |       hidden_size: 600
27 |       dropout: 0.5
28 | 


--------------------------------------------------------------------------------
/mmf/configs/models/butd/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   butd: &butd
 3 |     model_data_dir: ${env.data_dir}
 4 |     losses:
 5 |     - type: caption_cross_entropy
 6 |     classifier:
 7 |       type: language_decoder
 8 |       params:
 9 |         dropout: 0.5
10 |         hidden_dim: 1024
11 |         feature_dim: 2048
12 |         fc_bias_init: 0
13 |     image_feature_embeddings:
14 |     - modal_combine:
15 |         type: top_down_attention_lstm
16 |         params:
17 |           dropout: 0.5
18 |           hidden_dim: 1024
19 |           attention_dim: 1024
20 |       normalization: softmax
21 |       transform:
22 |         type: linear
23 |         params:
24 |           out_dim: 1
25 |     image_feature_dim: 2048
26 |     embedding_dim: 300
27 |     image_feature_encodings:
28 |     - type: finetune_faster_rcnn_fpn_fc7
29 |       params:
30 |         bias_file: models/detectron.defaults/fc7_b.pkl
31 |         weights_file: models/detectron.defaults/fc7_w.pkl
32 |         model_data_dir: ${model_config.butd.model_data_dir}
33 |     inference:
34 |       type: greedy
35 | 


--------------------------------------------------------------------------------
/mmf/configs/models/cnn_lstm/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   cnn_lstm:
 3 |     losses:
 4 |     - type: logit_bce
 5 |     text_embedding:
 6 |       embedding_dim: 20
 7 |     lstm:
 8 |       input_size: 20
 9 |       hidden_size: 50
10 |       bidirectional: true
11 |       batch_first: true
12 |     cnn:
13 |       layers:
14 |         input_dims: [3, 64, 128, 128, 64, 64]
15 |         output_dims: [64, 128, 128, 64, 64, 10]
16 |         kernel_sizes: [7, 5, 5, 5, 5, 1]
17 |     classifier:
18 |       type: mlp
19 |       params:
20 |         in_dim: 450
21 |         out_dim: 2
22 | 


--------------------------------------------------------------------------------
/mmf/configs/models/fusions/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./concat_bert.yaml
3 | 


--------------------------------------------------------------------------------
/mmf/configs/models/lxmert/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/lxmert/defaults.yaml
3 | 


--------------------------------------------------------------------------------
/mmf/configs/models/m4c/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   m4c:
 3 |     lr_scale_frcn: 0.1
 4 |     lr_scale_text_bert: 0.1
 5 |     lr_scale_mmt: 1.0  # no scaling
 6 |     text_bert_init_from_bert_base: true
 7 |     text_bert:
 8 |       num_hidden_layers: 3
 9 |     obj:
10 |       mmt_in_dim: 2048
11 |       dropout_prob: 0.1
12 |     ocr:
13 |       mmt_in_dim: 3002  # 300 (FastText) + 604 (PHOC) + 2048 (Faster R-CNN) + 50 (all zeros; legacy)
14 |       dropout_prob: 0.1
15 |     mmt:
16 |       hidden_size: 768
17 |       num_hidden_layers: 4
18 |     classifier:
19 |       type: linear
20 |       ocr_max_num: 50
21 |       ocr_ptr_net:
22 |         hidden_size: 768
23 |         query_key_size: 768
24 |       params: {}
25 |     model_data_dir: ${env.data_dir}
26 |     losses:
27 |     - type: m4c_decoding_bce_with_mask
28 | 


--------------------------------------------------------------------------------
/mmf/configs/models/m4c_captioner/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   m4c_captioner:
 3 |     lr_scale_frcn: 0.1
 4 |     lr_scale_text_bert: 0.1
 5 |     lr_scale_mmt: 1.0  # no scaling
 6 |     text_bert_init_from_bert_base: true
 7 |     text_bert:
 8 |       num_hidden_layers: 3
 9 |     obj:
10 |       mmt_in_dim: 2048
11 |       dropout_prob: 0.1
12 |     ocr:
13 |       mmt_in_dim: 3002  # 300 (FastText) + 604 (PHOC) + 2048 (Faster R-CNN) + 50 (all zeros; legacy)
14 |       dropout_prob: 0.1
15 |     mmt:
16 |       hidden_size: 768
17 |       num_hidden_layers: 4
18 |     classifier:
19 |       type: linear
20 |       ocr_max_num: 50
21 |       ocr_ptr_net:
22 |         hidden_size: 768
23 |         query_key_size: 768
24 |       params: {}
25 |     model_data_dir: ${env.data_dir}
26 |     losses:
27 |     - type: m4c_decoding_bce_with_mask
28 |     remove_unk_in_pred: true
29 | 


--------------------------------------------------------------------------------
/mmf/configs/models/mmbt/classification.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 |   mmbt:
3 |     training_head_type: classification
4 |     num_labels: 2
5 |     losses:
6 |     - type: cross_entropy
7 | 


--------------------------------------------------------------------------------
/mmf/configs/models/mmbt/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/mmf/configs/models/mmbt/with_features.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   mmbt:
 3 |     model_data_dir: ${env.data_dir}
 4 |     direct_features_input: true
 5 |     modal_encoder:
 6 |       type: finetune_faster_rcnn_fpn_fc7
 7 |       params:
 8 |         in_dim: 2048
 9 |         bias_file: models/detectron.defaults/fc7_b.pkl
10 |         weights_file: models/detectron.defaults/fc7_w.pkl
11 |         model_data_dir: ${model_config.mmbt.model_data_dir}
12 | 


--------------------------------------------------------------------------------
/mmf/configs/models/mmf_transformer/pretrain.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/models/mmf_transformer/defaults.yaml
 3 | 
 4 | model_config:
 5 |   mmf_transformer:
 6 |     heads:
 7 |       - type: mlm
 8 |         freeze: false
 9 |         lr_multiplier: 1.0
10 |         # default for bert base
11 |         hidden_size: 768
12 |         # default vocab size for bert base
13 |         vocab_size: 30522
14 | 


--------------------------------------------------------------------------------
/mmf/configs/models/movie_mcan/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   movie_mcan:
 3 |     model_data_dir: ${env.data_dir}
 4 |     classifier:
 5 |       type: triple_linear
 6 |       params: {}
 7 |     image_feature_embeddings:
 8 |       type: two_branches
 9 |       params:
10 |         hidden_dim: 1024
11 |         cond_dim: 1024
12 |         num_attn: 8
13 |         dropout: 0.1
14 |         num_layers: 6
15 |         cbn_num_layers: 4
16 |     image_feature_dim: 2048
17 |     image_feature_encodings:
18 |       type: default
19 |       params:
20 |         model_data_dir: ${model_config.movie_mcan.model_data_dir}
21 |         cond_features: 1024
22 |         in_dim: ${model_config.movie_mcan.image_feature_dim}
23 |     text_embeddings:
24 |       type: mcan
25 |       params:
26 |         hidden_dim: 1024
27 |         embedding_dim: 300
28 |         num_attn: 8
29 |         dropout: 0.1
30 |         num_layers: 6
31 |         num_attn_pool: 1
32 |         num_feat: 2
33 |         model_data_dir: ${model_config.movie_mcan.model_data_dir}
34 | 


--------------------------------------------------------------------------------
/mmf/configs/models/unimodal/bert.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   unimodal_text:
 3 |     bert_model_name: bert-base-uncased
 4 |     text_hidden_size: 768
 5 |     num_labels: 2
 6 |     text_encoder:
 7 |       type: transformer
 8 |       params:
 9 |         bert_model_name: ${model_config.unimodal_text.bert_model_name}
10 |         hidden_size: 768
11 |         num_hidden_layers: 12
12 |         num_attention_heads: 12
13 |         output_attentions: false
14 |         output_hidden_states: false
15 | 
16 |     classifier:
17 |       params:
18 |         in_dim: 768
19 | 


--------------------------------------------------------------------------------
/mmf/configs/models/unimodal/image.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   unimodal_image:
 3 |     # Either pretraining or classification
 4 |     direct_features_input: false
 5 |     freeze_base: false
 6 |     finetune_lr_multiplier: 1
 7 |     # Dimension of the embedding finally returned by the modal encoder
 8 |     modal_hidden_size: 2048
 9 |     # Used when classification head is activated
10 |     num_labels: 2
11 |     modal_encoder:
12 |       type: resnet152
13 |       params:
14 |         pretrained: true
15 |         pool_type: avg
16 |         num_output_features: 1
17 | 
18 |     classifier:
19 |       type: mlp
20 |       params:
21 |         in_dim: 2048
22 |         out_dim: 2
23 |         hidden_dim: 768
24 |         num_layers: 0
25 | 


--------------------------------------------------------------------------------
/mmf/configs/models/unimodal/text.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   unimodal_text:
 3 |     # Either pretraining or classification
 4 |     bert_model_name: bert-base-uncased
 5 |     freeze_base: false
 6 |     finetune_lr_multiplier: 1
 7 |     # Dimension of the embedding finally returned by the text encoder
 8 |     text_hidden_size: 300
 9 |     # Used when classification head is activated
10 |     num_labels: 2
11 |     text_encoder:
12 |       type: embedding
13 |       params:
14 |         operator: sum
15 |         embedding_params:
16 |           type: vocab
17 |           params:
18 |             type: intersected
19 |             embedding_name: glove.6B.300d
20 |             embedding_dim: 300
21 |             data_dir: ${env.data_dir}
22 |             vocab_file: vocabs/vocabulary_100k.txt
23 | 
24 |     classifier:
25 |       type: mlp
26 |       params:
27 |         in_dim: 300
28 |         out_dim: 2
29 |         hidden_dim: 768
30 |         num_layers: 0
31 | 


--------------------------------------------------------------------------------
/mmf/configs/models/unimodal/with_features.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   unimodal_image:
 3 |     model_data_dir: ${env.data_dir}
 4 |     direct_features_input: true
 5 |     modal_encoder:
 6 |       type: finetune_faster_rcnn_fpn_fc7
 7 |       params:
 8 |         in_dim: 2048
 9 |         bias_file: models/detectron.defaults/fc7_b.pkl
10 |         weights_file: models/detectron.defaults/fc7_w.pkl
11 |         model_data_dir: ${model_config.unimodal_image.model_data_dir}
12 |         num_output_features: 1
13 | 


--------------------------------------------------------------------------------
/mmf/configs/models/uniter/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   uniter:
 3 |     heads:
 4 |       vqa2:
 5 |         type: mlp
 6 |         freeze: false
 7 |         lr_multiplier: 1.0
 8 |         in_dim: 768
 9 |         hidden_size: 1536
10 |         num_labels: 3129
11 |         pooler_name: bert_pooler
12 |     text_embeddings:
13 |       type: bert_embeddings
14 |     image_embeddings:
15 |       type: uniter_image_embeddings
16 |       params:
17 |         name: 'uniter_image_embeddings'
18 |     encoder:
19 |       type: transformer
20 |       params:
21 |         bert_model_name: bert-base-uncased
22 |         hidden_size: 768
23 |         num_hidden_layers: 12
24 |         num_attention_heads: 12
25 |         output_attentions: false
26 |         output_hidden_states: false
27 |     tasks:
28 |     - vqa2
29 | 


--------------------------------------------------------------------------------
/mmf/configs/models/vilbert/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/vilbert/defaults.yaml
3 | 


--------------------------------------------------------------------------------
/mmf/configs/models/vinvl/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   vinvl:
 3 |     heads:
 4 |       test:
 5 |         type: mlp
 6 |         freeze: false
 7 |         lr_multiplier: 1.0
 8 |         in_dim: 768
 9 |         hidden_size: 1536
10 |         num_labels: 3129
11 |         pooler_name: bert_pooler
12 |     bert_model_name: bert-base-uncased
13 |     loss_type: sfmx
14 |     img_feature_dim: 2054
15 |     img_feature_type: 'frcnn'
16 |     use_img_layernorm: 1
17 |     img_layer_norm_eps: 1e-12
18 |     max_img_seq_len: 70
19 | 


--------------------------------------------------------------------------------
/mmf/configs/models/visual_bert/classification.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 |   visual_bert:
3 |     training_head_type: classification
4 | 


--------------------------------------------------------------------------------
/mmf/configs/models/visual_bert/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   visual_bert:
 3 |     bert_model_name: bert-base-uncased
 4 |     training_head_type: pretraining
 5 |     visual_embedding_dim: 2048
 6 |     special_visual_initialize: true
 7 |     embedding_strategy: plain
 8 |     bypass_transformer: false
 9 |     output_attentions: false
10 |     output_hidden_states: false
11 |     random_initialize: false
12 |     freeze_base: false
13 |     finetune_lr_multiplier: 1
14 |     # Default points to BERT pooler strategy which is to take
15 |     # representation of CLS token after passing it through a dense layer
16 |     pooler_strategy: default
17 |     zerobias: false     # Initialize last layer to predict closer to 0 on init for sigmoid outputs
18 | 


--------------------------------------------------------------------------------
/mmf/configs/models/visual_bert/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - configs/models/visual_bert/defaults.yaml
3 | 


--------------------------------------------------------------------------------
/mmf/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from . import processors
 3 | from .base_dataset import BaseDataset
 4 | from .base_dataset_builder import BaseDatasetBuilder
 5 | from .concat_dataset import ConcatDataset
 6 | from .lightning_multi_datamodule import LightningMultiDataModule
 7 | from .lightning_multi_dataset_loader import LightningMultiDataLoader
 8 | from .mmf_dataset import MMFDataset
 9 | from .mmf_dataset_builder import MMFDatasetBuilder
10 | from .multi_dataset_loader import MultiDatasetLoader
11 | 
12 | 
13 | __all__ = [
14 |     "processors",
15 |     "BaseDataset",
16 |     "BaseDatasetBuilder",
17 |     "ConcatDataset",
18 |     "MultiDatasetLoader",
19 |     "MMFDataset",
20 |     "MMFDatasetBuilder",
21 |     "LightningMultiDataModule",
22 |     "LightningMultiDataLoader",
23 | ]
24 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/airstore/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | from mmf.utils.env import import_files
4 | 
5 | 
6 | import_files(__file__, "mmf.datasets.builders.airstore")
7 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/airstore/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.common.registry import registry
 4 | from mmf.datasets.builders.airstore.dataset import AirstoreDataset
 5 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
 6 | 
 7 | 
 8 | @registry.register_builder("airstore")
 9 | class AirstoreDatasetBuilder(MMFDatasetBuilder):
10 |     def __init__(
11 |         self, dataset_name="airstore", dataset_class=AirstoreDataset, *args, **kwargs
12 |     ):
13 |         super().__init__(dataset_name)
14 |         self.dataset_class = AirstoreDataset
15 | 
16 |     @classmethod
17 |     def config_path(cls):
18 |         return "configs/datasets/airstore/defaults.yaml"
19 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/charades/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/charades/__init__.py


--------------------------------------------------------------------------------
/mmf/datasets/builders/charades/builder.py:
--------------------------------------------------------------------------------
 1 | from mmf.common.registry import registry
 2 | from mmf.datasets.builders.charades.dataset import CharadesDataset
 3 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
 4 | 
 5 | 
 6 | @registry.register_builder("charades")
 7 | class CharadesBuilder(MMFDatasetBuilder):
 8 |     def __init__(
 9 |         self, dataset_name="charades", dataset_class=CharadesDataset, *args, **kwargs
10 |     ):
11 |         super().__init__(dataset_name)
12 |         self.dataset_class = CharadesDataset
13 | 
14 |     @classmethod
15 |     def config_path(cls):
16 |         return "configs/datasets/charades/defaults.yaml"
17 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/clevr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/clevr/__init__.py


--------------------------------------------------------------------------------
/mmf/datasets/builders/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | __all__ = [
 3 |     "COCOBuilder",
 4 |     "COCODataset",
 5 |     "DetectionCOCOBuilder",
 6 |     "DetectionCOCODataset",
 7 |     "MaskedCOCOBuilder",
 8 |     "MaskedCOCODataset",
 9 | ]
10 | 
11 | from .builder import COCOBuilder
12 | from .dataset import COCODataset
13 | from .detection_builder import DetectionCOCOBuilder
14 | from .detection_dataset import DetectionCOCODataset
15 | from .masked_builder import MaskedCOCOBuilder
16 | from .masked_dataset import MaskedCOCODataset
17 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/coco/detection_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import registry
 3 | from mmf.datasets.builders.coco.detection_dataset import DetectionCOCODataset
 4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
 5 | 
 6 | 
 7 | @registry.register_builder("detection_coco")
 8 | class DetectionCOCOBuilder(MMFDatasetBuilder):
 9 |     def __init__(self):
10 |         super().__init__(
11 |             dataset_name="detection_coco", dataset_class=DetectionCOCODataset
12 |         )
13 | 
14 |     @classmethod
15 |     def config_path(cls):
16 |         return "configs/datasets/coco/detection.yaml"
17 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/coco/masked_builder.py:
--------------------------------------------------------------------------------
 1 | from mmf.common.registry import registry
 2 | from mmf.datasets.builders.coco.builder import COCOBuilder
 3 | 
 4 | from .masked_dataset import MaskedCOCODataset
 5 | 
 6 | 
 7 | @registry.register_builder("masked_coco")
 8 | class MaskedCOCOBuilder(COCOBuilder):
 9 |     def __init__(self):
10 |         super().__init__()
11 |         self.dataset_name = "masked_coco"
12 |         self.set_dataset_class(MaskedCOCODataset)
13 | 
14 |     def update_registry_for_model(self, config):
15 |         registry.register(
16 |             self.dataset_name + "_text_vocab_size",
17 |             self.dataset.masked_token_processor.get_vocab_size(),
18 |         )
19 | 
20 |     @classmethod
21 |     def config_path(cls):
22 |         return "configs/datasets/coco/masked.yaml"
23 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/coco2017/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/coco2017/__init__.py


--------------------------------------------------------------------------------
/mmf/datasets/builders/coco2017/masked_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import registry
 3 | from mmf.datasets.builders.coco2017.masked_dataset import MaskedCoco2017Dataset
 4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
 5 | 
 6 | 
 7 | @registry.register_builder("masked_coco2017")
 8 | class MaskedFlickr30kBuilder(MMFDatasetBuilder):
 9 |     def __init__(
10 |         self,
11 |         dataset_name="masked_coco2017",
12 |         dataset_class=MaskedCoco2017Dataset,
13 |         *args,
14 |         **kwargs,
15 |     ):
16 |         super().__init__(dataset_name, dataset_class, *args, **kwargs)
17 | 
18 |     @classmethod
19 |     def config_path(cls):
20 |         return "configs/datasets/coco2017/masked.yaml"
21 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/coco2017/masked_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.common.typings import MMFDatasetConfigType
 4 | from mmf.datasets.builders.localized_narratives.masked_dataset import (
 5 |     MaskedLocalizedNarrativesDatasetMixin,
 6 | )
 7 | from mmf.datasets.mmf_dataset import MMFDataset
 8 | 
 9 | 
10 | class MaskedCoco2017Dataset(MaskedLocalizedNarrativesDatasetMixin, MMFDataset):
11 |     def __init__(
12 |         self,
13 |         config: MMFDatasetConfigType,
14 |         dataset_type: str,
15 |         index: int,
16 |         *args,
17 |         **kwargs,
18 |     ):
19 |         super().__init__(
20 |             "masked_coco2017", config, dataset_type, index, *args, **kwargs
21 |         )
22 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/conceptual_captions/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | __all__ = [
 3 |     "ConceptualCaptionsBuilder",
 4 |     "ConceptualCaptionsDataset",
 5 |     "MaskedConceptualCaptionsBuilder",
 6 |     "MaskedConceptualCaptionsDataset",
 7 | ]
 8 | 
 9 | from .builder import ConceptualCaptionsBuilder
10 | from .dataset import ConceptualCaptionsDataset
11 | from .masked_builder import MaskedConceptualCaptionsBuilder
12 | from .masked_dataset import MaskedConceptualCaptionsDataset
13 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/conceptual_captions/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.common.registry import registry
 4 | from mmf.datasets.builders.coco import COCOBuilder
 5 | 
 6 | from .dataset import ConceptualCaptionsDataset
 7 | 
 8 | 
 9 | @registry.register_builder("conceptual_captions")
10 | class ConceptualCaptionsBuilder(COCOBuilder):
11 |     def __init__(self):
12 |         super().__init__()
13 |         self.dataset_name = "conceptual_captions"
14 |         self.set_dataset_class(ConceptualCaptionsDataset)
15 | 
16 |     @classmethod
17 |     def config_path(cls):
18 |         return "configs/datasets/conceptual_captions/defaults.yaml"
19 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/conceptual_captions/masked_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import registry
 3 | from mmf.datasets.builders.coco import MaskedCOCOBuilder
 4 | 
 5 | from .masked_dataset import MaskedConceptualCaptionsDataset
 6 | 
 7 | 
 8 | @registry.register_builder("masked_conceptual_captions")
 9 | class MaskedConceptualCaptionsBuilder(MaskedCOCOBuilder):
10 |     def __init__(self):
11 |         super().__init__()
12 |         self.dataset_name = "masked_conceptual_captions"
13 |         self.set_dataset_class(MaskedConceptualCaptionsDataset)
14 | 
15 |     @classmethod
16 |     def config_path(cls):
17 |         return "configs/datasets/conceptual_captions/masked.yaml"
18 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/conceptual_captions/masked_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.datasets.builders.coco import MaskedCOCODataset
 4 | 
 5 | 
 6 | class MaskedConceptualCaptionsDataset(MaskedCOCODataset):
 7 |     def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
 8 |         super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs)
 9 |         self.dataset_name = "masked_conceptual_captions"
10 |         self._two_sentence = config.get("two_sentence", True)
11 |         self._false_caption = config.get("false_caption", True)
12 |         self._two_sentence_probability = config.get("two_sentence_probability", 0.5)
13 |         self._false_caption_probability = config.get("false_caption_probability", 0.5)
14 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/flickr30k/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/flickr30k/__init__.py


--------------------------------------------------------------------------------
/mmf/datasets/builders/flickr30k/masked_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import registry
 3 | from mmf.datasets.builders.flickr30k.masked_dataset import MaskedFlickr30kDataset
 4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
 5 | 
 6 | 
 7 | @registry.register_builder("masked_flickr30k")
 8 | class MaskedFlickr30kBuilder(MMFDatasetBuilder):
 9 |     def __init__(
10 |         self,
11 |         dataset_name="masked_flickr30k",
12 |         dataset_class=MaskedFlickr30kDataset,
13 |         *args,
14 |         **kwargs,
15 |     ):
16 |         super().__init__(dataset_name, dataset_class, *args, **kwargs)
17 | 
18 |     @classmethod
19 |     def config_path(cls):
20 |         return "configs/datasets/flickr30k/masked.yaml"
21 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/flickr30k/masked_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.common.typings import MMFDatasetConfigType
 4 | from mmf.datasets.builders.localized_narratives.masked_dataset import (
 5 |     MaskedLocalizedNarrativesDatasetMixin,
 6 | )
 7 | from mmf.datasets.mmf_dataset import MMFDataset
 8 | 
 9 | 
10 | class MaskedFlickr30kDataset(MaskedLocalizedNarrativesDatasetMixin, MMFDataset):
11 |     def __init__(
12 |         self,
13 |         config: MMFDatasetConfigType,
14 |         dataset_type: str,
15 |         index: int,
16 |         *args,
17 |         **kwargs,
18 |     ):
19 |         super().__init__(
20 |             "masked_flickr30k", config, dataset_type, index, *args, **kwargs
21 |         )
22 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/glue/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/gqa/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | __all__ = ["GQABuilder", "GQADataset", "MaskedGQABuilder", "MaskedGQADataset"]
4 | 
5 | from .builder import GQABuilder
6 | from .dataset import GQADataset
7 | from .masked_builder import MaskedGQABuilder
8 | from .masked_dataset import MaskedGQADataset
9 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/gqa/masked_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.common.registry import registry
 4 | from mmf.datasets.builders.gqa.builder import GQABuilder
 5 | from mmf.datasets.builders.gqa.masked_dataset import MaskedGQADataset
 6 | 
 7 | 
 8 | @registry.register_builder("masked_gqa")
 9 | class MaskedGQABuilder(GQABuilder):
10 |     def __init__(self):
11 |         super().__init__()
12 |         self.dataset_name = "masked_gqa"
13 |         self.dataset_class = MaskedGQADataset
14 | 
15 |     @classmethod
16 |     def config_path(cls):
17 |         return "configs/datasets/gqa/masked.yaml"
18 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/hateful_memes/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/localized_narratives/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/localized_narratives/__init__.py


--------------------------------------------------------------------------------
/mmf/datasets/builders/localized_narratives/masked_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import registry
 3 | from mmf.datasets.builders.localized_narratives.masked_dataset import (
 4 |     MaskedLocalizedNarrativesDataset,
 5 | )
 6 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
 7 | 
 8 | 
 9 | @registry.register_builder("masked_localized_narratives")
10 | class MaskedLocalizedNarrativesBuilder(MMFDatasetBuilder):
11 |     def __init__(
12 |         self,
13 |         dataset_name="masked_localized_narratives",
14 |         dataset_class=MaskedLocalizedNarrativesDataset,
15 |         *args,
16 |         **kwargs,
17 |     ):
18 |         super().__init__(dataset_name, dataset_class, *args, **kwargs)
19 | 
20 |     @classmethod
21 |     def config_path(cls):
22 |         return "configs/datasets/localized_narratives/masked.yaml"
23 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/mmimdb/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/mmimdb/masked_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | from mmf.common.registry import registry
 9 | from mmf.datasets.builders.mmimdb.masked_dataset import MaskedMMImdbDataset
10 | from mmf.datasets.builders.vqa2.builder import VQA2Builder
11 | 
12 | 
13 | @registry.register_builder("masked_mmimdb")
14 | class MaskedMMImdbBuilder(VQA2Builder):
15 |     def __init__(self):
16 |         super().__init__()
17 |         self.dataset_name = "masked_mmimdb"
18 |         self.dataset_class = MaskedMMImdbDataset
19 | 
20 |     @classmethod
21 |     def config_path(cls):
22 |         return "configs/datasets/mmimdb/masked.yaml"
23 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/nlvr2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/nlvr2/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | from mmf.common.registry import registry
 9 | from mmf.datasets.builders.nlvr2.dataset import NLVR2Dataset
10 | from mmf.datasets.builders.vqa2.builder import VQA2Builder
11 | 
12 | 
13 | @registry.register_builder("nlvr2")
14 | class NLVR2Builder(VQA2Builder):
15 |     def __init__(self):
16 |         super().__init__()
17 |         self.dataset_name = "nlvr2"
18 |         self.dataset_class = NLVR2Dataset
19 | 
20 |     @classmethod
21 |     def config_path(cls):
22 |         return "configs/datasets/nlvr2/defaults.yaml"
23 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/ocrvqa/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/ocrvqa/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import Registry
 3 | from mmf.datasets.builders.ocrvqa.dataset import OCRVQADataset
 4 | from mmf.datasets.builders.textvqa.builder import TextVQABuilder
 5 | 
 6 | 
 7 | @Registry.register_builder("ocrvqa")
 8 | class OCRVQABuilder(TextVQABuilder):
 9 |     def __init__(self):
10 |         super().__init__()
11 |         self.dataset_name = "ocrvqa"
12 |         self.set_dataset_class(OCRVQADataset)
13 | 
14 |     @classmethod
15 |     def config_path(cls):
16 |         return "configs/datasets/ocrvqa/defaults.yaml"
17 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/ocrvqa/dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.datasets.builders.textvqa.dataset import TextVQADataset
 3 | 
 4 | 
 5 | class OCRVQADataset(TextVQADataset):
 6 |     def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
 7 |         super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs)
 8 |         self.dataset_name = "ocrvqa"
 9 | 
10 |     def preprocess_sample_info(self, sample_info):
11 |         # Do nothing in this case
12 |         return sample_info
13 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/okvqa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/okvqa/__init__.py


--------------------------------------------------------------------------------
/mmf/datasets/builders/okvqa/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import registry
 3 | from mmf.datasets.builders.okvqa.dataset import OKVQADataset
 4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
 5 | 
 6 | 
 7 | @registry.register_builder("okvqa")
 8 | class OKVQABuilder(MMFDatasetBuilder):
 9 |     def __init__(
10 |         self, dataset_name="okvqa", dataset_class=OKVQADataset, *args, **kwargs
11 |     ):
12 |         super().__init__(dataset_name, dataset_class, *args, **kwargs)
13 | 
14 |     @classmethod
15 |     def config_path(cls):
16 |         return "configs/datasets/okvqa/defaults.yaml"
17 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/retrieval/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | __all__ = ["RetrievalDataset", "RetrievalBuilder"]
4 | 
5 | from .builder import RetrievalBuilder
6 | from .dataset import RetrievalDataset
7 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/retrieval/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | 
 9 | from mmf.common.registry import registry
10 | from mmf.datasets.builders.retrieval.dataset import RetrievalDataset
11 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
12 | 
13 | 
14 | @registry.register_builder("retrieval")
15 | class RetrievalBuilder(MMFDatasetBuilder):
16 |     def __init__(
17 |         self, dataset_name="retrieval", dataset_class=RetrievalDataset, *args, **kwargs
18 |     ):
19 |         super().__init__(dataset_name, dataset_class, *args, **kwargs)
20 | 
21 | 
22 | @classmethod
23 | def config_path(cls):
24 |     return "config/datasets/retrieval/flickr30k_defaults.yaml"
25 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/sbu_captions/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | __all__ = ["MaskedSBUBuilder", "MaskedSBUDataset"]
4 | 
5 | from .masked_builder import MaskedSBUBuilder
6 | from .masked_dataset import MaskedSBUDataset
7 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/sbu_captions/masked_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.common.registry import registry
 4 | from mmf.datasets.builders.coco import MaskedCOCOBuilder
 5 | 
 6 | from .masked_dataset import MaskedSBUDataset
 7 | 
 8 | 
 9 | @registry.register_builder("masked_sbu")
10 | class MaskedSBUBuilder(MaskedCOCOBuilder):
11 |     def __init__(self):
12 |         super().__init__()
13 |         self.dataset_name = "masked_sbu"
14 |         self.set_dataset_class(MaskedSBUDataset)
15 | 
16 |     @classmethod
17 |     def config_path(cls):
18 |         return "configs/datasets/sbu_captions/masked.yaml"
19 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/sbu_captions/masked_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.datasets.builders.coco import MaskedCOCODataset
 4 | 
 5 | 
 6 | class MaskedSBUDataset(MaskedCOCODataset):
 7 |     def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
 8 |         super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs)
 9 |         self.dataset_name = "masked_sbu"
10 |         self._two_sentence = config.get("two_sentence", True)
11 |         self._false_caption = config.get("false_caption", True)
12 |         self._two_sentence_probability = config.get("two_sentence_probability", 0.5)
13 |         self._false_caption_probability = config.get("false_caption_probability", 0.5)
14 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/stvqa/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/stvqa/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import Registry
 3 | from mmf.datasets.builders.stvqa.dataset import STVQADataset
 4 | from mmf.datasets.builders.textvqa.builder import TextVQABuilder
 5 | 
 6 | 
 7 | @Registry.register_builder("stvqa")
 8 | class STVQABuilder(TextVQABuilder):
 9 |     def __init__(self):
10 |         super().__init__()
11 |         self.dataset_name = "stvqa"
12 |         self.set_dataset_class(STVQADataset)
13 | 
14 |     @classmethod
15 |     def config_path(cls):
16 |         return "configs/datasets/stvqa/defaults.yaml"
17 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/stvqa/dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.datasets.builders.textvqa.dataset import TextVQADataset
 3 | 
 4 | 
 5 | class STVQADataset(TextVQADataset):
 6 |     def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
 7 |         super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs)
 8 |         self.dataset_name = "stvqa"
 9 | 
10 |     def preprocess_sample_info(self, sample_info):
11 |         feature_path = sample_info["feature_path"]
12 |         append = "train"
13 | 
14 |         if self.dataset_type == "test":
15 |             append = "test_task3"
16 | 
17 |         if not feature_path.startswith(append):
18 |             feature_path = append + "/" + feature_path
19 | 
20 |         sample_info["feature_path"] = feature_path
21 |         return sample_info
22 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/textcaps/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/textvqa/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/vinvl/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | __all__ = ["VinVLBuilder", "VinVLDataset"]
3 | 
4 | from .builder import VinVLBuilder
5 | from .dataset import VinVLDataset
6 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_dialog/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_dialog/dataset.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import json
 3 | 
 4 | import torch
 5 | from mmf.common.sample import Sample
 6 | from mmf.datasets.builders.visual_dialog.database import VisualDialogDatabase
 7 | from mmf.datasets.builders.vqa2 import VQA2Dataset
 8 | 
 9 | 
10 | class VisualDialogDataset(VQA2Dataset):
11 |     def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
12 |         super().__init__(
13 |             config,
14 |             dataset_type,
15 |             imdb_file_index,
16 |             dataset_name="visual_dialog",
17 |             *args,
18 |             **kwargs,
19 |         )
20 | 
21 |         discriminative = config.discriminative
22 |         self._discriminative = discriminative.enabled
23 |         self._return_indices = discriminative.return_indices
24 |         self._no_unk = config.no_unk
25 |         self._return_history = config.return_history
26 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_entailment/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_entailment/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | from mmf.common.registry import registry
 9 | from mmf.datasets.builders.visual_entailment.dataset import VisualEntailmentDataset
10 | from mmf.datasets.builders.vqa2.builder import VQA2Builder
11 | 
12 | 
13 | @registry.register_builder("visual_entailment")
14 | class VisualEntailmentBuilder(VQA2Builder):
15 |     def __init__(self):
16 |         super().__init__()
17 |         self.dataset_name = "visual_entailment"
18 |         self.dataset_class = VisualEntailmentDataset
19 | 
20 |     @classmethod
21 |     def config_path(cls):
22 |         return "configs/datasets/visual_entailment/defaults.yaml"
23 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_genome/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_genome/detection_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import registry
 3 | from mmf.datasets.builders.visual_genome.detection_dataset import (
 4 |     DetectionVisualGenomeDataset,
 5 | )
 6 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
 7 | 
 8 | 
 9 | @registry.register_builder("detection_visual_genome")
10 | class DetectionVisualGenomeBuilder(MMFDatasetBuilder):
11 |     def __init__(self):
12 |         super().__init__(
13 |             dataset_name="detection_visual_genome",
14 |             dataset_class=DetectionVisualGenomeDataset,
15 |         )
16 | 
17 |     @classmethod
18 |     def config_path(cls):
19 |         return "configs/datasets/visual_genome/detection.yaml"
20 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_genome/detection_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.datasets.builders.coco.detection_dataset import DetectionCOCODataset
 3 | 
 4 | 
 5 | class DetectionVisualGenomeDataset(DetectionCOCODataset):
 6 |     def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
 7 |         super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs)
 8 |         if "name" in kwargs:
 9 |             name = kwargs["name"]
10 |         elif "dataset_name" in kwargs:
11 |             name = kwargs["dataset_name"]
12 |         else:
13 |             name = "detection_visual_genome"
14 |         self.dataset_name = name
15 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/visual_genome/masked_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.common.registry import registry
 4 | from mmf.datasets.builders.visual_genome.builder import VisualGenomeBuilder
 5 | from mmf.datasets.builders.visual_genome.masked_dataset import MaskedVisualGenomeDataset
 6 | 
 7 | 
 8 | @registry.register_builder("masked_visual_genome")
 9 | class MaskedVisualGenomeBuilder(VisualGenomeBuilder):
10 |     def __init__(self):
11 |         super().__init__()
12 |         self.dataset_name = "masked_visual_genome"
13 |         self.dataset_class = MaskedVisualGenomeDataset
14 | 
15 |     @classmethod
16 |     def config_path(cls):
17 |         return "configs/datasets/visual_genome/masked.yaml"
18 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/vizwiz/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .builder import VizWizBuilder
3 | from .dataset import VizWizDataset
4 | 
5 | 
6 | __all__ = ["VizWizBuilder", "VizWizDataset"]
7 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/vizwiz/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import registry
 3 | from mmf.datasets.builders.vizwiz.dataset import VizWizDataset
 4 | from mmf.datasets.builders.vqa2 import VQA2Builder
 5 | 
 6 | 
 7 | @registry.register_builder("vizwiz")
 8 | class VizWizBuilder(VQA2Builder):
 9 |     def __init__(self):
10 |         super().__init__()
11 |         self.dataset_name = "vizwiz"
12 |         self.set_dataset_class(VizWizDataset)
13 | 
14 |     @classmethod
15 |     def config_path(cls):
16 |         return "configs/datasets/vizwiz/defaults.yaml"
17 | 
18 |     def update_registry_for_model(self, config):
19 |         super().update_registry_for_model(config)
20 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/vqa2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | __all__ = ["VQA2Builder", "VQA2Dataset"]
3 | 
4 | from .builder import VQA2Builder
5 | from .dataset import VQA2Dataset
6 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/vqa2/masked_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | from mmf.common.registry import registry
 9 | from mmf.datasets.builders.vqa2.builder import VQA2Builder
10 | from mmf.datasets.builders.vqa2.masked_dataset import MaskedVQA2Dataset
11 | 
12 | 
13 | @registry.register_builder("masked_vqa2")
14 | class MaskedVQA2Builder(VQA2Builder):
15 |     def __init__(self):
16 |         super().__init__()
17 |         self.dataset_name = "masked_vqa2"
18 |         self.dataset_class = MaskedVQA2Dataset
19 | 
20 |     @classmethod
21 |     def config_path(cls):
22 |         return "configs/datasets/vqa2/masked.yaml"
23 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/vqa2/masked_q_vqa2_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | 
 9 | import os
10 | import warnings
11 | 
12 | from mmf.common.registry import registry
13 | from mmf.datasets.builders.vqa2.builder import VQA2Builder
14 | from mmf.datasets.builders.vqa2.masked_q_vqa2_dataset import MaskedQVQA2Dataset
15 | from mmf.datasets.concat_dataset import MMFConcatDataset
16 | 
17 | 
18 | @registry.register_builder("masked_q_vqa2")
19 | class MaskedQVQA2Builder(VQA2Builder):
20 |     def __init__(self):
21 |         super().__init__()
22 |         self.dataset_name = "masked_q_vqa2"
23 |         self.dataset_class = MaskedQVQA2Dataset
24 | 
25 |     @classmethod
26 |     def config_path(cls):
27 |         return "configs/datasets/vqa2/masked_q.yaml"
28 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/vqa2/ocr_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import Registry
 3 | from mmf.datasets.builders.vizwiz import VizWizBuilder
 4 | from mmf.datasets.builders.vqa2.ocr_dataset import VQA2OCRDataset
 5 | 
 6 | 
 7 | @Registry.register_builder("vqa2_ocr")
 8 | class TextVQABuilder(VizWizBuilder):
 9 |     def __init__(self):
10 |         super().__init__()
11 |         self.dataset_name = "VQA2_OCR"
12 |         self.set_dataset_class(VQA2OCRDataset)
13 | 
14 |     @classmethod
15 |     def config_path(self):
16 |         return None
17 | 


--------------------------------------------------------------------------------
/mmf/datasets/builders/vqacp_v2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/vqacp_v2/__init__.py


--------------------------------------------------------------------------------
/mmf/datasets/builders/vqacp_v2/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import registry
 3 | from mmf.datasets.builders.vqacp_v2.dataset import VQACPv2Dataset
 4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder
 5 | 
 6 | 
 7 | @registry.register_builder("vqacp_v2")
 8 | class VQACPv2Builder(MMFDatasetBuilder):
 9 |     def __init__(
10 |         self, dataset_name="vqacp_v2", dataset_class=VQACPv2Dataset, *args, **kwargs
11 |     ):
12 |         super().__init__(dataset_name, dataset_class, *args, **kwargs)
13 | 
14 |     @classmethod
15 |     def config_path(cls):
16 |         return "configs/datasets/vqacp_v2/defaults.yaml"
17 | 


--------------------------------------------------------------------------------
/mmf/datasets/databases/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import mmf.datasets.databases.readers  # noqa
 3 | 
 4 | from .annotation_database import AnnotationDatabase
 5 | from .features_database import FeaturesDatabase
 6 | from .image_database import ImageDatabase
 7 | from .scene_graph_database import SceneGraphDatabase
 8 | 
 9 | 
10 | __all__ = [
11 |     "AnnotationDatabase",
12 |     "FeaturesDatabase",
13 |     "ImageDatabase",
14 |     "SceneGraphDatabase",
15 | ]
16 | 


--------------------------------------------------------------------------------
/mmf/datasets/databases/readers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/datasets/databases/scene_graph_database.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.datasets.databases.annotation_database import AnnotationDatabase
 3 | 
 4 | 
 5 | class SceneGraphDatabase(AnnotationDatabase):
 6 |     def __init__(self, config, scene_graph_path, *args, **kwargs):
 7 |         super().__init__(config, scene_graph_path, *args, **kwargs)
 8 |         self.data_dict = {}
 9 |         for item in self.data:
10 |             self.data_dict[item["image_id"]] = item
11 | 
12 |     def __getitem__(self, idx):
13 |         return self.data_dict[idx]
14 | 


--------------------------------------------------------------------------------
/mmf/datasets/subset_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from torch.utils.data.dataset import Subset
 4 | 
 5 | 
 6 | class MMFSubset(Subset):
 7 |     def __init__(self, dataset, indices):
 8 |         super().__init__(dataset, indices)
 9 |         self._dir_representation = dir(self)
10 | 
11 |     def __getattr__(self, name):
12 |         if "_dir_representation" in self.__dict__ and name in self._dir_representation:
13 |             return getattr(self, name)
14 |         elif "dataset" in self.__dict__ and hasattr(self.dataset, name):
15 |             return getattr(self.dataset, name)
16 |         else:
17 |             raise AttributeError(name)
18 | 


--------------------------------------------------------------------------------
/mmf/models/albef/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import mmf.models.albef.vit  # noqa
3 | 


--------------------------------------------------------------------------------
/mmf/models/interfaces/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/models/m4c_captioner.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from mmf.common.registry import registry
 3 | from mmf.models.m4c import M4C
 4 | 
 5 | 
 6 | @registry.register_model("m4c_captioner")
 7 | class M4CCaptioner(M4C):
 8 |     def __init__(self, config):
 9 |         super().__init__(config)
10 |         self.remove_unk_in_pred = self.config.remove_unk_in_pred
11 | 
12 |     @classmethod
13 |     def config_path(cls):
14 |         return "configs/models/m4c_captioner/defaults.yaml"
15 | 
16 |     def _forward_output(self, sample_list, fwd_results):
17 |         super()._forward_output(sample_list, fwd_results)
18 | 
19 |         if self.remove_unk_in_pred:
20 |             # avoid outputting <unk> in the generated captions
21 |             fwd_results["scores"][..., self.answer_processor.UNK_IDX] = -1e10
22 | 
23 |         return fwd_results
24 | 


--------------------------------------------------------------------------------
/mmf/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import mmf.models.transformers.backends  # noqa
 4 | from mmf.models.transformers.base import (  # noqa
 5 |     BaseTransformer,
 6 |     BaseTransformerBackend,
 7 |     BaseTransformerBackendConfig,
 8 |     BaseTransformerHead,
 9 |     BaseTransformerInput,
10 |     BaseTransformerModalityConfig,
11 | )
12 | 


--------------------------------------------------------------------------------
/mmf/models/transformers/backends/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | from mmf.utils.env import import_files
4 | 
5 | 
6 | import_files(__file__, "mmf.models.transformers.backends")
7 | 


--------------------------------------------------------------------------------
/mmf/models/transformers/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | from mmf.utils.env import import_files
4 | 
5 | 
6 | import_files(__file__, "mmf.models.transformers.heads")
7 | 


--------------------------------------------------------------------------------
/mmf/models/unit/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | __all__ = ["UniT"]
3 | 
4 | from .unit import UniT
5 | 


--------------------------------------------------------------------------------
/mmf/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import mmf.modules.losses  # noqa
3 | import mmf.modules.metrics  # noqa
4 | import mmf.modules.optimizers  # noqa
5 | import mmf.modules.schedulers  # noqa
6 | 


--------------------------------------------------------------------------------
/mmf/projects:
--------------------------------------------------------------------------------
1 | ../projects


--------------------------------------------------------------------------------
/mmf/trainers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | __all__ = ["BaseTrainer"]
3 | 
4 | from .base_trainer import BaseTrainer
5 | 


--------------------------------------------------------------------------------
/mmf/trainers/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/trainers/callbacks/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.trainers.callbacks.base import Callback
 4 | from mmf.utils.build import build_scheduler
 5 | 
 6 | 
 7 | class LRSchedulerCallback(Callback):
 8 |     """Callback which executes a LR scheduler. It is executed after every
 9 |     batch iteration.
10 |     """
11 | 
12 |     def __init__(self, config, trainer):
13 |         """
14 |         Attr:
15 |             config(mmf_typings.DictConfig): Config for the callback
16 |             trainer(Type[BaseTrainer]): Trainer object
17 |         """
18 |         super().__init__(config, trainer)
19 | 
20 |         self._scheduler = None
21 |         if self.training_config.lr_scheduler is True:
22 |             self._scheduler = build_scheduler(trainer.optimizer, self.config)
23 | 
24 |     def on_update_end(self, **kwargs):
25 |         if self._scheduler is not None:
26 |             self._scheduler.step()
27 | 


--------------------------------------------------------------------------------
/mmf/trainers/core/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/trainers/core/profiling.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import logging
 4 | import threading
 5 | from abc import ABC
 6 | from typing import Type
 7 | 
 8 | from mmf.utils.timer import Timer
 9 | 
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | class TrainerProfilingMixin(ABC):
15 |     profiler: Type[Timer] = Timer()
16 | 
17 |     def profile(self, text: str) -> None:
18 |         if self.training_config.logger_level != "debug":
19 |             return
20 |         logging.debug(
21 |             f"tid={threading.current_thread().ident}, {text}: {self.profiler.get_time_since_start()}"
22 |         )
23 |         self.profiler.reset()
24 | 


--------------------------------------------------------------------------------
/mmf/trainers/lightning_core/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/utils/features/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf/utils/file_io.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from iopath.common.file_io import PathManager as pm
 4 | 
 5 | 
 6 | PathManager = pm()
 7 | 
 8 | try:
 9 |     # [FB only] register internal file IO handlers
10 |     from mmf.utils.fb.file_io_handlers import register_handlers
11 | 
12 |     register_handlers(PathManager)
13 | except ImportError:
14 |     pass
15 | 


--------------------------------------------------------------------------------
/mmf/utils/phoc/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | from .build_phoc import build_phoc  # NoQA
4 | 


--------------------------------------------------------------------------------
/mmf/utils/phoc/build_phoc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .cphoc import build_phoc as _build_phoc_raw
 4 | 
 5 | 
 6 | _alphabet = {
 7 |     "a",
 8 |     "b",
 9 |     "c",
10 |     "d",
11 |     "e",
12 |     "f",
13 |     "g",
14 |     "h",
15 |     "i",
16 |     "j",
17 |     "k",
18 |     "l",
19 |     "m",
20 |     "n",
21 |     "o",
22 |     "p",
23 |     "q",
24 |     "r",
25 |     "s",
26 |     "t",
27 |     "u",
28 |     "v",
29 |     "w",
30 |     "x",
31 |     "y",
32 |     "z",
33 |     "0",
34 |     "1",
35 |     "2",
36 |     "3",
37 |     "4",
38 |     "5",
39 |     "6",
40 |     "7",
41 |     "8",
42 |     "9",
43 | }  # NoQA
44 | 
45 | 
46 | def build_phoc(token):
47 |     token = token.lower().strip()
48 |     token = "".join([c for c in token if c in _alphabet])
49 |     phoc = _build_phoc_raw(token)
50 |     phoc = np.array(phoc, dtype=np.float32)
51 |     return phoc
52 | 


--------------------------------------------------------------------------------
/mmf/utils/torchscript.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from typing import Dict, Optional
 4 | 
 5 | from torch import Tensor
 6 | 
 7 | 
 8 | def getattr_torchscriptable(
 9 |     dictionary: Dict[str, Tensor], key: str, default: Optional[Tensor] = None
10 | ) -> Optional[Tensor]:
11 |     if key in dictionary:
12 |         return dictionary[key]
13 |     else:
14 |         return default
15 | 


--------------------------------------------------------------------------------
/mmf/utils/transform.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from torch import Tensor
 4 | 
 5 | 
 6 | def transform_to_batch_sequence(tensor: Tensor) -> Tensor:
 7 |     if len(tensor.size()) == 2:
 8 |         return tensor
 9 |     else:
10 |         assert len(tensor.size()) == 3
11 |         return tensor.contiguous().view(-1, tensor.size(-1))
12 | 
13 | 
14 | def transform_to_batch_sequence_dim(tensor: Tensor) -> Tensor:
15 |     if len(tensor.size()) == 3:
16 |         return tensor
17 |     else:
18 |         assert len(tensor.size()) == 4
19 |         return tensor.contiguous().view(-1, tensor.size(-2), tensor.size(-1))
20 | 


--------------------------------------------------------------------------------
/mmf/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import sys
 4 | 
 5 | 
 6 | __version__ = "1.0.0rc12"
 7 | 
 8 | msg = "MMF is only compatible with Python 3.6 and newer."
 9 | 
10 | 
11 | if sys.version_info < (3, 6):
12 |     raise ImportError(msg)
13 | 


--------------------------------------------------------------------------------
/mmf_cli/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mmf_cli/predict.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3 -u
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | import sys
 5 | 
 6 | from mmf_cli.run import run
 7 | 
 8 | 
 9 | def predict(opts=None):
10 |     if opts is None:
11 |         sys.argv.extend(["evaluation.predict=true"])
12 |     else:
13 |         opts.extend(["evaluation.predict=true"])
14 | 
15 |     run(predict=True)
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     predict()
20 | 


--------------------------------------------------------------------------------
/mmf_cli/torchx_entryscript.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | """
 3 | Entrypoint script used by TorchX to start the training run in each process
 4 | """
 5 | 
 6 | from mmf_cli.fb_run import fb_scheduler_run
 7 | 
 8 | 
 9 | if __name__ == "__main__":
10 |     fb_scheduler_run()
11 | 


--------------------------------------------------------------------------------
/projects/ban/README.md:
--------------------------------------------------------------------------------
 1 | # BAN
 2 | 
 3 | This repository contains the code for BAN model. Please cite the following paper if you are using BAN model from mmf:
 4 | 
 5 | * Kim, J. H., Jun, J., & Zhang, B. T. (2018). *Bilinear attention networks*. In Advances in Neural Information Processing Systems (pp. 1564-1574). ([arXiV](https://arxiv.org/abs/1805.07932))
 6 | ```
 7 | @inproceedings{kim2018bilinear,
 8 |   title={Bilinear attention networks},
 9 |   author={Kim, Jin-Hwa and Jun, Jaehyun and Zhang, Byoung-Tak},
10 |   booktitle={Advances in Neural Information Processing Systems},
11 |   pages={1564--1574},
12 |   year={2018}
13 | }
14 | ```
15 | 
16 | ## Installation
17 | 
18 | Follow installation instructions in the [documentation](https://mmf.readthedocs.io/en/latest/notes/installation.html).
19 | 
20 | ## Training
21 | To train BAN model on the VQA2 dataset, run the following command
22 | ```
23 | mmf_run config=projects/ban/configs/vqa2/defaults.yaml run_type=train_val dataset=vqa2 model=ban
24 | ```
25 | 


--------------------------------------------------------------------------------
/projects/ban/configs/textvqa/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../../../mmf/configs/textvqa/defaults.yaml
 3 | 
 4 | evaluation:
 5 |   metrics:
 6 |   - vqa_accuracy
 7 | 
 8 | training:
 9 |   early_stop:
10 |     criteria: textvqa/vqa_accuracy
11 |     minimize: false
12 | 


--------------------------------------------------------------------------------
/projects/ban/configs/vizwiz/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../../../mmf/configs/vizwiz/defaults.yaml
 3 | 
 4 | evaluation:
 5 |   metrics:
 6 |   - vqa_accuracy
 7 | 
 8 | training:
 9 |   early_stop:
10 |     criteria: vizwiz/vqa_accuracy
11 |     minimize: false
12 | 


--------------------------------------------------------------------------------
/projects/ban/configs/vqa2/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../../../mmf/configs/vqa2/defaults.yaml
 3 | 
 4 | evaluation:
 5 |   metrics:
 6 |   - vqa_accuracy
 7 | 
 8 | training:
 9 |   early_stop:
10 |     criteria: vqa2/vqa_accuracy
11 |     minimize: false
12 | 


--------------------------------------------------------------------------------
/projects/butd/README.md:
--------------------------------------------------------------------------------
 1 | # BUTD
 2 | 
 3 | This repository contains the code for pytorch implementation of BUTD model, released originally under this ([repo](https://github.com/peteanderson80/bottom-up-attention)). Please cite the following paper if you are using BUTD model from mmf:
 4 | 
 5 | * Anderson, P., He, X., Buehler, C., Teney, D., Johnson, M., Gould, S., & Zhang, L. (2018). *Bottom-up and top-down attention for image captioning and visual question answering*. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 6077-6086). ([arXiV](https://arxiv.org/abs/1707.07998))
 6 | ```
 7 | @inproceedings{Anderson2017up-down,
 8 |   author = {Peter Anderson and Xiaodong He and Chris Buehler and Damien Teney and Mark Johnson and Stephen Gould and Lei Zhang},
 9 |   title = {Bottom-Up and Top-Down Attention for Image Captioning and Visual Question Answering},
10 |   booktitle={CVPR},
11 |   year = {2018}
12 | }
13 | ```
14 | 
15 | Please see [https://mmf.sh/docs/projects/butd](https://mmf.sh/docs/projects/butd) for more details on how to use BUTD model.
16 | 


--------------------------------------------------------------------------------
/projects/butd/configs/coco/beam_search.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   butd:
 6 |     inference:
 7 |       type: beam_search
 8 |       params:
 9 |         beam_length: 5
10 | 
11 | training:
12 |   batch_size: 1
13 | 


--------------------------------------------------------------------------------
/projects/butd/configs/coco/defaults.yaml:
--------------------------------------------------------------------------------
 1 | optimizer:
 2 |   type: Adamax
 3 |   params:
 4 |     eps: 1.0e-08
 5 |     lr: 0.01
 6 |     weight_decay: 0
 7 | 
 8 | evaluation:
 9 |   metrics:
10 |   - caption_bleu4
11 | 
12 | training:
13 |   clip_norm_mode: all
14 |   clip_gradients: true
15 |   lr_ratio: 0.1
16 |   lr_scheduler: true
17 |   lr_steps:
18 |   - 15000
19 |   - 25000
20 |   - 35000
21 |   - 45000
22 |   max_grad_l2_norm: 0.25
23 |   max_updates: 50000
24 |   use_warmup: true
25 |   warmup_factor: 0.2
26 |   warmup_iterations: 1000
27 |   batch_size: 256
28 |   num_workers: 7
29 |   task_size_proportional_sampling: true
30 |   early_stop:
31 |     criteria: coco/caption_bleu4
32 |     minimize: false
33 | 


--------------------------------------------------------------------------------
/projects/butd/configs/coco/nucleus_sampling.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   butd:
 6 |     inference:
 7 |       type: nucleus_sampling
 8 |       params:
 9 |         sum_threshold: 0.8
10 | 
11 | training:
12 |   batch_size: 1
13 | 


--------------------------------------------------------------------------------
/projects/butd/configs/conceptual_captions/beam_search.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   butd:
 6 |     inference:
 7 |       type: nucleus_sampling
 8 |       params:
 9 |         sum_threshold: 0.8
10 | 
11 | training:
12 |   batch_size: 1
13 | 


--------------------------------------------------------------------------------
/projects/butd/configs/conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
 1 | optimizer:
 2 |   type: Adamax
 3 |   params:
 4 |     eps: 1.0e-08
 5 |     lr: 0.01
 6 |     weight_decay: 0
 7 | 
 8 | evaluation:
 9 |   metrics:
10 |   - caption_bleu4
11 | 
12 | training:
13 |   clip_norm_mode: all
14 |   clip_gradients: true
15 |   lr_ratio: 0.1
16 |   lr_scheduler: true
17 |   lr_steps:
18 |   - 15000
19 |   - 25000
20 |   - 35000
21 |   - 45000
22 |   max_grad_l2_norm: 0.25
23 |   max_updates: 50000
24 |   use_warmup: true
25 |   warmup_factor: 0.2
26 |   warmup_iterations: 1000
27 |   batch_size: 256
28 |   num_workers: 7
29 |   task_size_proportional_sampling: true
30 |   early_stop:
31 |     criteria: conceptual_captions/caption_bleu4
32 |     minimize: false
33 | 


--------------------------------------------------------------------------------
/projects/butd/configs/conceptual_captions/nucleus_sampling.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   butd:
 6 |     inference:
 7 |       type: nucleus_sampling
 8 |       params:
 9 |         sum_threshold: 0.8
10 | 
11 | training:
12 |   batch_size: 1
13 | 


--------------------------------------------------------------------------------
/projects/butd/configs/textcaps/beam_search.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   textcaps:
 6 |     zoo_requirements:
 7 |     - textvqa.defaults
 8 |     - textcaps.defaults
 9 |     annotations:
10 |       val:
11 |       - textcaps/defaults/annotations/imdb_val_filtered_by_image_id.npy
12 | 
13 | model_config:
14 |   butd: &butd
15 |     inference:
16 |       type: beam_search
17 |       params:
18 |         beam_length: 5
19 | 
20 | training:
21 |   batch_size: 1
22 | 


--------------------------------------------------------------------------------
/projects/butd/configs/textcaps/eval_pretrained_coco_model.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./beam_search.yaml
 3 | 
 4 | dataset_config:
 5 |   textcaps:
 6 |     processors:
 7 |       text_processor:
 8 |         params:
 9 |           vocab:
10 |             vocab_file: textcaps/defaults/extras/vocabs/coco_vocabulary_captioning_thresh5.txt
11 |       caption_processor:
12 |         params:
13 |           vocab:
14 |             vocab_file: textcaps/defaults/extras/vocabs/coco_vocabulary_captioning_thresh5.txt
15 | 


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/concat_bert/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/concat_bert/hateful_memes/defaults.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/concat_bow/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/concat_bow/hateful_memes/defaults.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/late_fusion/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/late_fusion/hateful_memes/defaults.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/mmbt/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../mmbt/configs/hateful_memes/defaults.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/mmbt/with_features.yaml:
--------------------------------------------------------------------------------
1 | ../../../mmbt/configs/hateful_memes/with_features.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/mmf_transformer/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../../../mmf_transformer/configs/hateful_memes/defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/unimodal/bert.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/unimodal/configs/hateful_memes/bert.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/unimodal/image.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/unimodal/configs/hateful_memes/image.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/unimodal/text.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/unimodal/configs/hateful_memes/text.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/unimodal/with_features.yaml:
--------------------------------------------------------------------------------
1 | ../../../others/unimodal/configs/hateful_memes/with_features.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/vilbert/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../vilbert/configs/hateful_memes/defaults.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/vilbert/direct.yaml:
--------------------------------------------------------------------------------
1 | ../../../vilbert/configs/hateful_memes/direct.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/vilbert/from_cc.yaml:
--------------------------------------------------------------------------------
1 | ../../../vilbert/configs/hateful_memes/from_cc.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/visual_bert/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../visual_bert/configs/hateful_memes/defaults.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/visual_bert/direct.yaml:
--------------------------------------------------------------------------------
1 | ../../../visual_bert/configs/hateful_memes/direct.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/configs/visual_bert/from_coco.yaml:
--------------------------------------------------------------------------------
1 | ../../../visual_bert/configs/hateful_memes/from_coco.yaml


--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/attack_vectors.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./multilabel.yaml
3 | - configs/datasets/hateful_memes/fine_grained/attack_vectors.yaml
4 | 
5 | model_config:
6 |   visual_bert:
7 |     num_labels: 9
8 | 


--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../../../configs/visual_bert/defaults.yaml
3 | - configs/datasets/hateful_memes/fine_grained/with_features.yaml
4 | 
5 | training:
6 |   find_unused_parameters: false
7 |   batch_size: 128
8 |   max_updates: 10000
9 | 


--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/hateful_pc_attack.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./multilabel.yaml
3 | - configs/datasets/hateful_memes/fine_grained/hateful_pc_attack.yaml
4 | 
5 | model_config:
6 |   visual_bert:
7 |     num_labels: 17
8 | 


--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/multilabel.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   visual_bert:
 6 |     training_head_type: classification
 7 |     num_labels: 9
 8 |     losses:
 9 |     - type: logit_bce
10 | 
11 | evaluation:
12 |   metrics:
13 |   - accuracy
14 |   - multilabel_macro_f1
15 |   - multilabel_micro_f1
16 | 
17 | training:
18 |   find_unused_parameters: false
19 |   early_stop:
20 |     criteria: hateful_memes/multilabel_micro_f1
21 |     minimize: false
22 | 


--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/pc_attack.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./multilabel.yaml
3 | - configs/datasets/hateful_memes/fine_grained/pc_attack.yaml
4 | 
5 | model_config:
6 |   visual_bert:
7 |     num_labels: 15
8 | 


--------------------------------------------------------------------------------
/projects/hateful_memes/fine_grained/configs/visual_bert/protected_groups.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./multilabel.yaml
3 | - configs/datasets/hateful_memes/fine_grained/protected_groups.yaml
4 | 
5 | model_config:
6 |   visual_bert:
7 |     num_labels: 7
8 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/conceptnet_only.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   krisp:
 6 |     graph_module:
 7 |       kg_path: okvqa/defaults/annotations/annotations/graphs/cn_graph.pth.tar
 8 |       node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_cn.pkl
 9 |       graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_cn.pth.tar
10 | dataset_config:
11 |   okvqa:
12 |     processors:
13 |       answer_processor:
14 |         params:
15 |           graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_cn.pth.tar
16 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/dbpedia_only.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   krisp:
 6 |     graph_module:
 7 |       kg_path: okvqa/defaults/annotations/annotations/graphs/db_graph.pth.tar
 8 |       node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_db.pkl
 9 |       graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_db.pth.tar
10 | dataset_config:
11 |   okvqa:
12 |     processors:
13 |       answer_processor:
14 |         params:
15 |           graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_db.pth.tar
16 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/haspart_only.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   krisp:
 6 |     graph_module:
 7 |       kg_path: okvqa/defaults/annotations/annotations/graphs/hp_graph.pth.tar
 8 |       node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_hp.pkl
 9 |       graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_hp.pth.tar
10 | dataset_config:
11 |   okvqa:
12 |     processors:
13 |       answer_processor:
14 |         params:
15 |           graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_hp.pth.tar
16 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/okvqav10.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   krisp:
 6 |     graph_module:
 7 |       vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt
 8 |       graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10.pth.tar
 9 |       okvqa_v_mode: "v1.0-121"
10 |       old_graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab.pth.tar
11 |       ans_translation_file: okvqa/defaults/annotations/annotations/ans_vocab_tx.pth.tar
12 |       num_labels: 2253
13 |     num_labels: 2253
14 | dataset_config:
15 |   okvqa:
16 |     processors:
17 |       answer_processor:
18 |         params:
19 |           vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt
20 |           graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10.pth.tar
21 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/okvqav10_fromfullpretrain.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   krisp:
 6 |     graph_module:
 7 |       vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt
 8 |       graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10_fp.pth.tar
 9 |       okvqa_v_mode: "v1.0-121-mc"
10 |       old_graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab.pth.tar
11 |       ans_translation_file: okvqa/defaults/annotations/annotations/ans_vocab_tx.pth.tar
12 |       num_labels: 2253
13 |     num_labels: 2253
14 | dataset_config:
15 |   okvqa:
16 |     processors:
17 |       answer_processor:
18 |         params:
19 |           vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt
20 |           graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10_fp.pth.tar
21 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/randomgraph.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   krisp:
 6 |     graph_module:
 7 |       kg_path: okvqa/defaults/annotations/annotations/graphs/random_graph.pth.tar
 8 |       node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_random.pkl
 9 |       graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_random.pth.tar
10 | dataset_config:
11 |   okvqa:
12 |     processors:
13 |       answer_processor:
14 |         params:
15 |           graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_random.pth.tar
16 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   okvqa:
 6 |     annotations:
 7 |       train:
 8 |       - okvqa/defaults/annotations/annotations/imdb_trainval.npy
 9 |       val:
10 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
11 |       test:
12 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
13 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_cnonly.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./conceptnet_only.yaml
 3 | 
 4 | dataset_config:
 5 |   okvqa:
 6 |     annotations:
 7 |       train:
 8 |       - okvqa/defaults/annotations/annotations/imdb_trainval.npy
 9 |       val:
10 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
11 |       test:
12 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
13 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_dbonly.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./dbpedia_only.yaml
 3 | 
 4 | dataset_config:
 5 |   okvqa:
 6 |     annotations:
 7 |       train:
 8 |       - okvqa/defaults/annotations/annotations/imdb_trainval.npy
 9 |       val:
10 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
11 |       test:
12 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
13 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_hponly.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./haspart_only.yaml
 3 | 
 4 | dataset_config:
 5 |   okvqa:
 6 |     annotations:
 7 |       train:
 8 |       - okvqa/defaults/annotations/annotations/imdb_trainval.npy
 9 |       val:
10 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
11 |       test:
12 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
13 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_okvqav10.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./okvqav10.yaml
 3 | 
 4 | dataset_config:
 5 |   okvqa:
 6 |     annotations:
 7 |       train:
 8 |       - okvqa/defaults/annotations/annotations/imdb_trainval_v10.npy
 9 |       val:
10 |       - okvqa/defaults/annotations/annotations/imdb_test_v10.npy
11 |       test:
12 |       - okvqa/defaults/annotations/annotations/imdb_test_v10.npy
13 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_okvqav10_fromfullpretrain.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./okvqav10_fromfullpretrain.yaml
 3 | 
 4 | dataset_config:
 5 |   okvqa:
 6 |     annotations:
 7 |       train:
 8 |       - okvqa/defaults/annotations/annotations/imdb_trainval_v10.npy
 9 |       val:
10 |       - okvqa/defaults/annotations/annotations/imdb_test_v10.npy
11 |       test:
12 |       - okvqa/defaults/annotations/annotations/imdb_test_v10.npy
13 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_random.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./randomgraph.yaml
 3 | 
 4 | dataset_config:
 5 |   okvqa:
 6 |     annotations:
 7 |       train:
 8 |       - okvqa/defaults/annotations/annotations/imdb_trainval.npy
 9 |       val:
10 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
11 |       test:
12 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
13 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/train_val_vgonly.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./visualgenome_only.yaml
 3 | 
 4 | dataset_config:
 5 |   okvqa:
 6 |     annotations:
 7 |       train:
 8 |       - okvqa/defaults/annotations/annotations/imdb_trainval.npy
 9 |       val:
10 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
11 |       test:
12 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
13 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/krisp/okvqa/visualgenome_only.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   krisp:
 6 |     graph_module:
 7 |       kg_path: okvqa/defaults/annotations/annotations//graphs/vg_graph.pth.tar
 8 |       node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_vg.pkl
 9 |       graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_vg.pth.tar
10 | dataset_config:
11 |   okvqa:
12 |     processors:
13 |       answer_processor:
14 |         params:
15 |           graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_vg.pth.tar
16 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/visual_bert/masked_coco/okvqa_safe.yaml:
--------------------------------------------------------------------------------
 1 | optimizer:
 2 |   type: adam_w
 3 |   params:
 4 |     lr: 5e-5
 5 |     eps: 1e-8
 6 | 
 7 | scheduler:
 8 |   type: warmup_cosine
 9 |   params:
10 |     num_warmup_steps: 2000
11 |     num_training_steps: 88000
12 | 
13 | dataset_config:
14 |   masked_coco:
15 |     return_features_info: true
16 |     features:
17 |       train:
18 |       - okvqa/defaults/features/features_fc6/COCO_trainval2014.lmdb
19 |       val:
20 |       - okvqa/defaults/features/features_fc6/COCO_trainval2014.lmdb
21 |       test:
22 |       - okvqa/defaults/features/features_fc6/COCO_trainval2014.lmdb
23 | 
24 | training:
25 |   find_unused_parameters: true
26 |   batch_size: 56
27 |   lr_scheduler: true
28 |   # Don't forget to update schedule_attributes if you update this
29 |   max_updates: 88000
30 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/visual_bert/okvqa/defaults_v10.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   visual_bert:
 6 |     num_labels: 2253
 7 | 
 8 | dataset_config:
 9 |   okvqa:
10 |       answer_processor:
11 |         params:
12 |           vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt
13 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/visual_bert/okvqa/train_val.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   okvqa:
 6 |     annotations:
 7 |       train:
 8 |       - okvqa/defaults/annotations/annotations/imdb_trainval.npy
 9 |       val:
10 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
11 |       test:
12 |       - okvqa/defaults/annotations/annotations/imdb_test.npy
13 | 


--------------------------------------------------------------------------------
/projects/krisp/configs/visual_bert/okvqa/train_val_okvqav10.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults_v10.yaml
 3 | 
 4 | dataset_config:
 5 |   okvqa:
 6 |     annotations:
 7 |       train:
 8 |       - okvqa/defaults/annoations/annotations/imdb_trainval_v10.npy
 9 |       val:
10 |       - okvqa/defaults/annotations/annoations/imdb_test_v10.npy
11 |       test:
12 |       - okvqa/defaults/annotations/annoations/imdb_test_v10.npy
13 | 


--------------------------------------------------------------------------------
/projects/krisp/requirements.txt:
--------------------------------------------------------------------------------
1 | networkx
2 | torch_geometric
3 | gensim
4 | 


--------------------------------------------------------------------------------
/projects/lorra/configs/vqa2/train_val.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | # Use soft copy
 5 | dataset_config:
 6 |   vqa2_train_val:
 7 |     use_ocr: true
 8 |     processors:
 9 |       context_processor:
10 |         type: fasttext
11 |         params:
12 |           download_initially: true
13 |           max_length: 50
14 |           model_file: wiki.en.bin
15 |       answer_processor:
16 |         type: soft_copy_answer
17 |         params:
18 |           vocab_file: vqa2/defaults/extras/vocabs/answers_vqa.txt
19 |           preprocessor:
20 |             type: simple_word
21 |             params: {}
22 |           context_preprocessor:
23 |             type: simple_word
24 |             params: {}
25 |           max_length: 50
26 |           num_answers: 10
27 | 


--------------------------------------------------------------------------------
/projects/lorra/configs/vqa2/train_val_resnet_only.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   vqa2_train_val:
 6 |     use_images: false
 7 |     use_features: true
 8 |     zoo_requirements:
 9 |     - coco.resnet152
10 |     - vqa2.defaults
11 |     features:
12 |       train:
13 |       - coco/resnet152/features/trainval2014.lmdb
14 |       - coco/resnet152/features/trainval2014.lmdb
15 |       val:
16 |       - coco/resnet152/features/trainval2014.lmdb
17 |       test:
18 |       - coco/resnet152/features/test2015.lmdb
19 |     annotations:
20 |       train:
21 |       - vqa2/defaults/annotations/imdb_train2014.npy
22 |       - vqa2/defaults/annotations/imdb_valminusminival2014.npy
23 |       val:
24 |       - vqa2/defaults/annotations/imdb_minival2014.npy
25 | 
26 | model_config:
27 |   lorra:
28 |     image_feature_encodings:
29 |     - type: default
30 |       params: {}
31 | 


--------------------------------------------------------------------------------
/projects/lxmert/configs/coco/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../defaults.yaml
3 | - ./masked.yaml
4 | 


--------------------------------------------------------------------------------
/projects/lxmert/configs/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/models/lxmert/defaults.yaml
 3 | 
 4 | optimizer:
 5 |   type: adam_w
 6 |   params:
 7 |     lr: 1e-4
 8 |     eps: 1e-8
 9 | 
10 | training:
11 |   seed: 9595
12 |   batch_size: 4
13 |   lr_scheduler: false
14 |   find_unused_parameters: true
15 |   use_warmup: true
16 |   warmup_factor: 0.05
17 |   warmup_iterations: 1000
18 |   max_epochs: 20
19 |   max_updates: null
20 |   pin_memory: true
21 | 
22 | 
23 | evaluation:
24 |   metrics:
25 |   - vqa_accuracy
26 | 


--------------------------------------------------------------------------------
/projects/lxmert/configs/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | - coco/masked.yaml
4 | - gqa/masked.yaml
5 | - visual_genome/masked.yaml
6 | - vqa2/masked.yaml
7 | - configs/models/lxmert/defaults.yaml
8 | 


--------------------------------------------------------------------------------
/projects/lxmert/configs/visual_genome/masked.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   masked_visual_genome:
 3 |     use_features: true
 4 |     add_answer: true
 5 |     max_features: 36
 6 |     features:
 7 |         train:
 8 |         - visual_genome/detectron_fix_100/fc6/,visual_genome/resnet152/
 9 |         - visual_genome/detectron_fix_100/fc6/,visual_genome/resnet152/
10 |     annotations:
11 |         train:
12 |         - imdb/visual_genome/vg_question_answers.jsonl
13 |         - imdb/visual_genome/vg_question_answers_placeholder.jsonl
14 | 


--------------------------------------------------------------------------------
/projects/lxmert/configs/visual_genome/pretrain.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./masked.yaml
 3 | 
 4 | optimizer:
 5 |   type: adam_w
 6 |   params:
 7 |     lr: 1e-4
 8 |     eps: 1e-8
 9 | 
10 | scheduler:
11 |   type: warmup_linear
12 |   params:
13 |     num_warmup_steps: 1000
14 |     num_training_steps: ${training.max_updates}
15 | 
16 | training:
17 |   batch_size: 480
18 |   lr_scheduler: true
19 |   # Don't forget to update schedule_attributes if you update this
20 |   max_updates: 11000
21 | 


--------------------------------------------------------------------------------
/projects/lxmert/configs/vqa2/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../defaults.yaml
3 | - ./masked.yaml
4 | 


--------------------------------------------------------------------------------
/projects/m4c/README.md:
--------------------------------------------------------------------------------
 1 | # Iterative Answer Prediction with Pointer-Augmented Multimodal Transformers for TextVQA
 2 | 
 3 | This repository contains the code for M4C model from the following paper, released under the MMF:
 4 | 
 5 | * R. Hu, A. Singh, T. Darrell, M. Rohrbach, *Iterative Answer Prediction with Pointer-Augmented Multimodal Transformers for TextVQA*. in CVPR, 2020 ([PDF](https://arxiv.org/pdf/1911.06258.pdf))
 6 | ```
 7 | @inproceedings{hu2020iterative,
 8 |   title={Iterative Answer Prediction with Pointer-Augmented Multimodal Transformers for TextVQA},
 9 |   author={Hu, Ronghang and Singh, Amanpreet and Darrell, Trevor and Rohrbach, Marcus},
10 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
11 |   year={2020}
12 | }
13 | ```
14 | 
15 | Please see [https://mmf.sh/docs/projects/m4c](https://mmf.sh/docs/projects/m4c) for more details on how to use m4c model.
16 | 


--------------------------------------------------------------------------------
/projects/m4c/configs/textvqa/joint_with_stvqa.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   textvqa:
 6 |     use_images: false
 7 |     use_features: true
 8 |     zoo_requirements:
 9 |     - textvqa.defaults
10 |     - textvqa.ocr_en
11 |     - stvqa.defaults
12 |     - stvqa.ocr_en
13 |     features:
14 |       train:
15 |       - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
16 |       - stvqa/defaults/features/detectron.lmdb,stvqa/ocr_en/features/ocr_en_frcn_features.lmdb
17 |     annotations:
18 |       train:
19 |       - textvqa/defaults/annotations/imdb_train_ocr_en.npy
20 |       - stvqa/defaults/annotations/imdb_subtrain.npy
21 | 


--------------------------------------------------------------------------------
/projects/m4c/configs/textvqa/ocr_ml.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   textvqa:
 6 |     zoo_requirements:
 7 |     - textvqa.defaults
 8 |     - textvqa.ocr_ml
 9 |     features:
10 |       train:
11 |       - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_ml/features/ocr_ml_frcn_features.lmdb
12 |       val:
13 |       - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_ml/features/ocr_ml_frcn_features.lmdb
14 |       test:
15 |       - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_ml/features/ocr_ml_frcn_features.lmdb
16 | 
17 |     annotations:
18 |       train:
19 |       - textvqa/defaults/annotations/imdb_train_ocr_ml.npy
20 |       val:
21 |       - textvqa/defaults/annotations/imdb_val_ocr_ml.npy
22 |       test:
23 |       - textvqa/defaults/annotations/imdb_test_ocr_ml.npy
24 | 


--------------------------------------------------------------------------------
/projects/m4c/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/projects/m4c_captioner/README.md:
--------------------------------------------------------------------------------
 1 | # TextCaps: a Dataset for Image Captioning with Reading Comprehension
 2 | 
 3 | This repository contains the code for M4C-Captioner model from the following paper, released under the MMF.
 4 | 
 5 | * O. Sidorov, R. Hu, M. Rohrbach, A. Singh, *TextCaps: a Dataset for Image Captioning with Reading Comprehension*. in ECCV, 2020 ([PDF](https://arxiv.org/pdf/2003.12462.pdf))
 6 | ```
 7 | @inproceedings{sidorov2019textcaps,
 8 |   title={TextCaps: a Dataset for Image Captioningwith Reading Comprehension},
 9 |   author={Sidorov, Oleksii and Hu, Ronghang and Rohrbach, Marcus and Singh, Amanpreet},
10 |   booktitle={European Conference on Computer Vision},
11 |   year={2020}
12 | }
13 | ```
14 | 
15 | Please see [https://mmf.sh/docs/projects/m4c_captioner](https://mmf.sh/docs/projects/m4c_captioner) for more details on how to use m4c_captioner model.
16 | 


--------------------------------------------------------------------------------
/projects/m4c_captioner/configs/butd/textcaps:
--------------------------------------------------------------------------------
1 | ../../../butd/configs/textcaps


--------------------------------------------------------------------------------
/projects/m4c_captioner/configs/m4c_captioner/coco/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/datasets/coco/ocr_en.yaml
 3 | optimizer:
 4 |   params:
 5 |     eps: 1.0e-08
 6 |     lr: 1e-4
 7 |     weight_decay: 0
 8 |   type: Adam
 9 | 
10 | evaluation:
11 |   metrics:
12 |   - textcaps_bleu4
13 | 
14 | training:
15 |     clip_norm_mode: all
16 |     clip_gradients: true
17 |     max_grad_l2_norm: 0.25
18 |     lr_scheduler: true
19 |     lr_steps:
20 |     - 14000
21 |     - 19000
22 |     lr_ratio: 0.1
23 |     use_warmup: true
24 |     warmup_factor: 0.2
25 |     warmup_iterations: 1000
26 |     max_iterations: 24000
27 |     batch_size: 128
28 |     num_workers: 8
29 |     early_stop:
30 |       criteria: coco/textcaps_bleu4
31 |       minimize: false
32 | 


--------------------------------------------------------------------------------
/projects/m4c_captioner/configs/m4c_captioner/coco/eval_on_textcaps.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   textcaps:
 6 |     zoo_requirements:
 7 |     - textvqa.defaults
 8 |     - textvqa.ocr_en
 9 |     - textcaps.defaults
10 |     use_images: false
11 |     use_features: true
12 |     features:
13 |       val:
14 |       - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
15 |       test:
16 |       - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
17 |     annotations:
18 |       val:
19 |       - textcaps/defaults/annotations/imdb_val_filtered_by_image_id.npy  # only one sample per image_id
20 |       test:
21 |       - textcaps/defaults/annotations/imdb_test_filtered_by_image_id.npy  # only one sample per image_id
22 | 


--------------------------------------------------------------------------------
/projects/m4c_captioner/configs/m4c_captioner/textcaps/with_caffe2_feat.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   textcaps:
 6 |     zoo_requirements:
 7 |     - textvqa.caffe2
 8 |     - textvqa.ocr_en
 9 |     - textcaps.defaults
10 |     features:
11 |       train:
12 |       - textvqa/caffe2/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
13 |       val:
14 |       - textvqa/caffe2/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
15 |       test:
16 |       - textvqa/caffe2/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb
17 | 


--------------------------------------------------------------------------------
/projects/m4c_captioner/configs/m4c_captioner/textcaps/without_ocr.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | dataset_config:
4 |   textcaps:
5 |     use_ocr: False  # remove all the OCRs from each image
6 | 


--------------------------------------------------------------------------------
/projects/m4c_captioner/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/projects/mmbt/configs/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/models/mmbt/classification.yaml
 3 | - configs/datasets/hateful_memes/bert.yaml
 4 | 
 5 | scheduler:
 6 |   type: warmup_linear
 7 |   params:
 8 |     num_warmup_steps: 2000
 9 |     num_training_steps: ${training.max_updates}
10 | 
11 | optimizer:
12 |   type: adam_w
13 |   params:
14 |     lr: 1e-5
15 |     eps: 1e-8
16 | 
17 | evaluation:
18 |     metrics:
19 |     - accuracy
20 |     - binary_f1
21 |     - roc_auc
22 | 
23 | training:
24 |   batch_size: 32
25 |   lr_scheduler: true
26 |   max_updates: 22000
27 |   early_stop:
28 |     criteria: hateful_memes/roc_auc
29 |     minimize: false
30 | 
31 | checkpoint:
32 |   pretrained_state_mapping:
33 |     bert: bert
34 | 


--------------------------------------------------------------------------------
/projects/mmbt/configs/hateful_memes/hateful_with_refiner.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/models/mmbt/classification.yaml
 3 | - configs/datasets/hateful_memes/bert.yaml
 4 | 
 5 | scheduler:
 6 |   type: warmup_linear
 7 |   params:
 8 |     num_warmup_steps: 2000
 9 |     num_training_steps: ${training.max_updates}
10 | 
11 | optimizer:
12 |   type: adam_w
13 |   params:
14 |     lr: 1e-5
15 |     eps: 1e-8
16 | 
17 | evaluation:
18 |     metrics:
19 |     - accuracy
20 |     - binary_f1
21 |     - roc_auc
22 | 
23 | training:
24 |   batch_size: 32
25 |   lr_scheduler: true
26 |   max_updates: 22000
27 |   early_stop:
28 |     criteria: hateful_memes/roc_auc
29 |     minimize: false
30 | 
31 | checkpoint:
32 |   pretrained_state_mapping:
33 |     bert: bert
34 | 


--------------------------------------------------------------------------------
/projects/mmbt/configs/hateful_memes/with_features.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | - configs/models/mmbt/with_features.yaml
 4 | - configs/datasets/hateful_memes/with_features.yaml
 5 | 
 6 | optimizer:
 7 |   type: adam_w
 8 |   params:
 9 |     lr: 5e-5
10 |     eps: 1e-8
11 | 


--------------------------------------------------------------------------------
/projects/mmbt/configs/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/models/mmbt/pretrain.yaml
 3 | - configs/models/mmbt/with_features.yaml
 4 | 
 5 | scheduler:
 6 |   type: warmup_linear
 7 |   params:
 8 |     num_warmup_steps: 2000
 9 |     num_training_steps: ${training.max_updates}
10 | 
11 | optimizer:
12 |   type: adam_w
13 |   params:
14 |     lr: 5e-5
15 |     eps: 1e-8
16 | 
17 | training:
18 |   batch_size: 128
19 |   lr_scheduler: true
20 |   max_updates: 22000
21 | 
22 | checkpoint:
23 |   pretrained_state_mapping:
24 |     bert: bert
25 | 


--------------------------------------------------------------------------------
/projects/mmbt/configs/mmimdb/with_features.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | - configs/models/mmbt/with_features.yaml
4 | - configs/datasets/mmimdb/with_features.yaml
5 | 


--------------------------------------------------------------------------------
/projects/mmbt/configs/okvqa/with_images.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/models/mmbt/classification.yaml
 3 | 
 4 | scheduler:
 5 |   type: warmup_linear
 6 |   params:
 7 |     num_warmup_steps: 2000
 8 |     num_training_steps: ${training.max_updates}
 9 | 
10 | dataset_config:
11 |   okvqa:
12 |     processors:
13 |       text_processor:
14 |         type: bert_tokenizer
15 |         params:
16 |           tokenizer_config:
17 |             type: bert-base-uncased
18 |             params:
19 |               do_lower_case: true
20 |           mask_probability: 0
21 |           max_seq_length: 128
22 | 
23 | model_config:
24 |   mmbt:
25 |     losses:
26 |     - logit_bce
27 |     num_labels: 2253
28 | 
29 | optimizer:
30 |   type: adam_w
31 |   params:
32 |     lr: 1e-5
33 |     eps: 1e-8
34 | 
35 | evaluation:
36 |     metrics:
37 |     - vqa_accuracy
38 | 
39 | training:
40 |   batch_size: 32
41 |   lr_scheduler: true
42 |   max_updates: 22000
43 |   early_stop:
44 |     criteria: okvqa/vqa_accuracy
45 |     minimize: false
46 | 
47 | checkpoint:
48 |   pretrained_state_mapping:
49 |     bert: bert
50 | 


--------------------------------------------------------------------------------
/projects/mmbt/configs/vqa2/with_raw_images.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | - ../../../../mmf/configs/datasets/vqa2/with_raw_images.yaml
4 | 


--------------------------------------------------------------------------------
/projects/mmf_transformer/configs/charades/direct.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/models/mmf_transformer/with_audio_video.yaml
 3 | 
 4 | model_config:
 5 |   mmf_transformer:
 6 |     heads:
 7 |     - type: mlp
 8 |       num_labels: 157
 9 | 
10 | optimizer:
11 |   type: adam_w
12 |   params:
13 |     lr: 5e-5
14 |     eps: 1e-8
15 | 
16 | scheduler:
17 |   type: warmup_cosine
18 |   params:
19 |     num_warmup_steps: 2000
20 |     num_training_steps: 60000
21 | 
22 | evaluation:
23 |   metrics:
24 |   - multilabel_micro_f1
25 | 
26 | training:
27 |   batch_size: 8
28 |   lr_scheduler: true
29 |   # Don't forget to update schedule_attributes if you update this
30 |   max_updates: 60000
31 |   find_unused_parameters: true
32 |   early_stop:
33 |     criteria: charades/multilabel_micro_f1
34 |     minimize: false
35 | 


--------------------------------------------------------------------------------
/projects/mmf_transformer/configs/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/datasets/hateful_memes/bert.yaml
 3 | 
 4 | model_config:
 5 |   mmf_transformer:
 6 |     training_head_type: classification
 7 |     num_labels: 2
 8 |     losses:
 9 |     - cross_entropy
10 | 
11 | scheduler:
12 |   type: warmup_linear
13 |   params:
14 |     num_warmup_steps: 2000
15 |     num_training_steps: ${training.max_updates}
16 | 
17 | optimizer:
18 |   type: adam_w
19 |   params:
20 |     lr: 1e-5
21 |     eps: 1e-8
22 | 
23 | evaluation:
24 |     metrics:
25 |     - accuracy
26 |     - binary_f1
27 |     - roc_auc
28 | 
29 | training:
30 |   batch_size: 32
31 |   lr_scheduler: true
32 |   max_updates: 22000
33 |   early_stop:
34 |     criteria: hateful_memes/roc_auc
35 |     minimize: false
36 | 
37 | checkpoint:
38 |   pretrained_state_mapping:
39 |     pooler: pooler
40 |     backend.transformer: backend.transformer
41 |     backend.embeddings: backend.embeddings
42 | 


--------------------------------------------------------------------------------
/projects/mmf_transformer/configs/masked_coco/pretrain_itm.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | model_config:
 5 |   mmf_transformer:
 6 |     heads:
 7 |       - type: itm
 8 |         freeze: false
 9 |         lr_multiplier: 1.0
10 |         # default for bert base
11 |         hidden_size: 768
12 | 
13 | dataset_config:
14 |   masked_coco:
15 |     return_features_info: true
16 |     false_caption: true
17 |     false_caption_probability: 0.1
18 | 


--------------------------------------------------------------------------------
/projects/mmf_transformer/localized_narratives/masked.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/datasets/localized_narratives/masked.yaml
 3 | - configs/models/mmf_transformer/pretrain.yaml
 4 | 
 5 | optimizer:
 6 |   type: adam_w
 7 |   params:
 8 |     lr: 5e-5
 9 |     eps: 1e-8
10 | scheduler:
11 |   type: warmup_linear
12 |   params:
13 |     num_warmup_steps: 1000
14 |     num_training_steps: 11000
15 | training:
16 |   batch_size: 2
17 |   lr_scheduler: true
18 |   # Don't forget to update schedule_attributes if you update this
19 |   max_updates: 11000
20 | 


--------------------------------------------------------------------------------
/projects/others/cnn_lstm/clevr/defaults.yaml:
--------------------------------------------------------------------------------
 1 | optimizer:
 2 |   type: Adamax
 3 |   params:
 4 |     eps: 1.0e-08
 5 |     lr: 0.01
 6 |     weight_decay: 0
 7 | 
 8 | evaluation:
 9 |   metrics:
10 |   - accuracy
11 | 
12 | training:
13 |   batch_size: 128
14 |   snapshot_interval: 6000
15 |   early_stop:
16 |     criteria: clevr/accuracy
17 |     minimize: false
18 | 


--------------------------------------------------------------------------------
/projects/others/cnn_lstm/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   cnn_lstm:
 3 |     losses:
 4 |     - type: cross_entropy
 5 |     classifier:
 6 |       type: mlp
 7 |       params:
 8 |         in_dim: 190
 9 |         out_dim: 2
10 | 
11 | scheduler:
12 |   type: warmup_linear
13 |   params:
14 |     num_warmup_steps: 2000
15 |     num_training_steps: ${training.max_updates}
16 | 
17 | optimizer:
18 |   type: adam_w
19 |   params:
20 |     lr: 5e-5
21 |     eps: 1e-8
22 | 
23 | evaluation:
24 |   metrics:
25 |   - accuracy
26 |   - binary_f1
27 |   - roc_auc
28 | 
29 | training:
30 |   batch_size: 480
31 |   lr_scheduler: true
32 |   max_updates: 60000
33 |   early_stop:
34 |     criteria: hateful_memes/roc_auc
35 |     minimize: false
36 | 


--------------------------------------------------------------------------------
/projects/others/concat_bert/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/datasets/hateful_memes/bert.yaml
 3 | 
 4 | model_config:
 5 |   concat_bert:
 6 |     classifier:
 7 |       type: mlp
 8 |       params:
 9 |         num_layers: 2
10 |     losses:
11 |     - type: cross_entropy
12 | 
13 | scheduler:
14 |   type: warmup_linear
15 |   params:
16 |     num_warmup_steps: 2000
17 |     num_training_steps: ${training.max_updates}
18 | 
19 | optimizer:
20 |   type: adam_w
21 |   params:
22 |     lr: 1e-5
23 |     eps: 1e-8
24 | 
25 | evaluation:
26 |   metrics:
27 |   - accuracy
28 |   - binary_f1
29 |   - roc_auc
30 | 
31 | training:
32 |   batch_size: 64
33 |   lr_scheduler: true
34 |   max_updates: 22000
35 |   early_stop:
36 |     criteria: hateful_memes/roc_auc
37 |     minimize: false
38 | 
39 | checkpoint:
40 |   pretrained_state_mapping:
41 |     base: base
42 | 


--------------------------------------------------------------------------------
/projects/others/concat_bow/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   concat_bow:
 3 |     classifier:
 4 |       type: mlp
 5 |       params:
 6 |         num_layers: 2
 7 |     losses:
 8 |     - type: cross_entropy
 9 | 
10 | scheduler:
11 |   type: warmup_linear
12 |   params:
13 |     num_warmup_steps: 2000
14 |     num_training_steps: ${training.max_updates}
15 | 
16 | optimizer:
17 |   type: adam_w
18 |   params:
19 |     lr: 5e-5
20 |     eps: 1e-8
21 | 
22 | evaluation:
23 |   metrics:
24 |   - accuracy
25 |   - binary_f1
26 |   - roc_auc
27 | 
28 | training:
29 |   batch_size: 32
30 |   lr_scheduler: true
31 |   max_updates: 22000
32 |   early_stop:
33 |     criteria: hateful_memes/roc_auc
34 |     minimize: false
35 | 
36 | checkpoint:
37 |   pretrained_state_mapping:
38 |     base: base
39 | 


--------------------------------------------------------------------------------
/projects/others/late_fusion/hateful_memes/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/datasets/hateful_memes/bert.yaml
 3 | 
 4 | model_config:
 5 |   late_fusion:
 6 |     modal_classifier:
 7 |       type: mlp
 8 |       params:
 9 |         num_layers: 2
10 |     text_classifier:
11 |       type: mlp
12 |       params:
13 |         num_layers: 2
14 |     losses:
15 |     - type: cross_entropy
16 | 
17 | scheduler:
18 |   type: warmup_linear
19 |   params:
20 |     num_warmup_steps: 2000
21 |     num_training_steps: ${training.max_updates}
22 | 
23 | optimizer:
24 |   type: adam_w
25 |   params:
26 |     lr: 5e-5
27 |     eps: 1e-8
28 | 
29 | evaluation:
30 |   metrics:
31 |   - accuracy
32 |   - binary_f1
33 |   - roc_auc
34 | 
35 | training:
36 |   batch_size: 64
37 |   lr_scheduler: true
38 |   max_updates: 22000
39 |   early_stop:
40 |     criteria: hateful_memes/roc_auc
41 |     minimize: false
42 | 
43 | checkpoint:
44 |   pretrained_state_mapping:
45 |     base: base
46 | 


--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   masked_coco:
 3 |     return_features_info: true
 4 | 
 5 | optimizer:
 6 |   type: adam_w
 7 |   params:
 8 |     lr: 5e-5
 9 |     eps: 1e-8
10 | 
11 | scheduler:
12 |   type: warmup_linear
13 |   params:
14 |     num_warmup_steps: 1000
15 |     num_training_steps: 11000
16 | 
17 | training:
18 |   batch_size: 480
19 |   lr_scheduler: true
20 |   # Don't forget to update schedule_attributes if you update this
21 |   max_updates: 11000
22 | 


--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_coco/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_coco/pretrain_joint_vqa2.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | - ../masked_vqa2/defaults.yaml
 4 | 
 5 | model_config:
 6 |   mmf_bert:
 7 |     training_head_type: pretraining,vqa
 8 | 
 9 | scheduler:
10 |   type: warmup_linear
11 |   params:
12 |     num_warmup_steps: 3000
13 |     num_training_steps: 33000
14 | 
15 | training:
16 |   max_updates: 34000
17 | 


--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../masked_coco/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_conceptual_captions:
6 |     return_features_info: true
7 | 


--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_conceptual_captions/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../masked_coco/pretrain.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_vqa2:
 6 |     annotations:
 7 |       train:
 8 |       - vqa2/defaults/annotations/imdb_train2014_len_coco.npy
 9 |     return_features_info: true
10 | 


--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/masked_vqa2/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/others/mmf_bert/configs/visual_entailment/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   mmf_bert:
 3 |     training_head_type: visual_entailment
 4 |     losses:
 5 |     - type: cross_entropy
 6 | 
 7 | dataset_config:
 8 |   visual_entailment:
 9 |     return_features_info: true
10 |     processors:
11 |       text_processor:
12 |         type: bert_tokenizer
13 |         params:
14 |           tokenizer_config:
15 |             type: bert-base-uncased
16 |             params:
17 |               do_lower_case: true
18 |           mask_probability: 0
19 |           max_seq_length: 128
20 | 
21 | optimizer:
22 |   type: adam_w
23 |   params:
24 |     lr: 5e-5
25 |     eps: 1e-8
26 | 
27 | scheduler:
28 |   type: warmup_linear
29 |   params:
30 |     num_warmup_steps: 6000
31 |     num_training_steps: 60000
32 | 
33 | evaluation:
34 |   metrics:
35 |   - accuracy
36 | 
37 | training:
38 |   early_stop:
39 |     criteria: visual_entailment/accuracy
40 |     minimize: false
41 | 


--------------------------------------------------------------------------------
/projects/others/unimodal/configs/hateful_memes/bert.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./text.yaml
 3 | - configs/datasets/hateful_memes/bert.yaml
 4 | - configs/models/unimodal/bert.yaml
 5 | 
 6 | model_config:
 7 |   unimodal_text:
 8 |     classifier:
 9 |       type: mlp
10 |       params:
11 |         in_dim: 768
12 |         num_layers: 2
13 | 
14 | training:
15 |   batch_size: 128
16 | 


--------------------------------------------------------------------------------
/projects/others/unimodal/configs/hateful_memes/image.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   unimodal_image:
 3 |     classifier:
 4 |       type: mlp
 5 |       params:
 6 |         num_layers: 2
 7 |     losses:
 8 |     - type: cross_entropy
 9 | 
10 | scheduler:
11 |   type: warmup_linear
12 |   params:
13 |     num_warmup_steps: 2000
14 |     num_training_steps: ${training.max_updates}
15 | 
16 | optimizer:
17 |   type: adam_w
18 |   params:
19 |     lr: 1e-5
20 |     eps: 1e-8
21 | 
22 | evaluation:
23 |   metrics:
24 |   - accuracy
25 |   - binary_f1
26 |   - roc_auc
27 | 
28 | training:
29 |   batch_size: 32
30 |   lr_scheduler: true
31 |   max_updates: 22000
32 |   early_stop:
33 |     criteria: hateful_memes/roc_auc
34 |     minimize: false
35 | 
36 | checkpoint:
37 |   pretrained_state_mapping:
38 |     base: base
39 | 


--------------------------------------------------------------------------------
/projects/others/unimodal/configs/hateful_memes/text.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   unimodal_text:
 3 |     classifier:
 4 |       type: mlp
 5 |       params:
 6 |         num_layers: 2
 7 |     losses:
 8 |     - type: cross_entropy
 9 | 
10 | scheduler:
11 |   type: warmup_linear
12 |   params:
13 |     num_warmup_steps: 2000
14 |     num_training_steps: ${training.max_updates}
15 | 
16 | optimizer:
17 |   type: adam_w
18 |   params:
19 |     lr: 5e-5
20 |     eps: 1e-8
21 | 
22 | evaluation:
23 |   metrics:
24 |   - accuracy
25 |   - binary_f1
26 |   - roc_auc
27 | 
28 | training:
29 |   batch_size: 32
30 |   lr_scheduler: true
31 |   max_updates: 22000
32 |   early_stop:
33 |     criteria: hateful_memes/roc_auc
34 |     minimize: false
35 | 
36 | checkpoint:
37 |   pretrained_state_mapping:
38 |     base: base
39 | 


--------------------------------------------------------------------------------
/projects/others/unimodal/configs/hateful_memes/with_features.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./image.yaml
 3 | - configs/datasets/hateful_memes/with_features.yaml
 4 | - configs/models/unimodal/with_features.yaml
 5 | 
 6 | 
 7 | optimizer:
 8 |   type: adam_w
 9 |   params:
10 |     lr: 5e-5
11 |     eps: 1e-8
12 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_coco/defaults.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_coco/fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_coco/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_coco:
6 |     annotations:
7 |       train:
8 |       - coco/defaults/annotations/imdb_karpathy_train_by_image_50_pc.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_coco/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_coco/pretrain.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_coco/ten_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_coco/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_coco:
6 |     annotations:
7 |       train:
8 |       - coco/defaults/annotations/imdb_karpathy_train_by_image_10_pc.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_conceptual_captions/defaults.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_conceptual_captions/pretrain.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/full_coco_generated.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_conceptual_captions:
 6 |     zoo_requirements:
 7 |     - cc.coco_generated
 8 |     - cc.defaults
 9 |     annotations:
10 |       train:
11 |       - cc/coco_generated/annotations/train_all.npy
12 |       val:
13 |       - cc/coco_generated/annotations/val.npy
14 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/half.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_conceptual_captions:
6 |     annotations:
7 |       train:
8 |       - cc/defaults/annotations/train_mid.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/half_coco_generated.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_conceptual_captions:
 6 |     zoo_requirements:
 7 |     - cc.coco_generated
 8 |     - cc.defaults
 9 |     annotations:
10 |       train:
11 |       - cc/coco_generated/annotations/train_mid.npy
12 |       val:
13 |       - cc/coco_generated/annotations/val.npy
14 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_conceptual_captions:
6 |     annotations:
7 |       train:
8 |       - cc/defaults/annotations/train_small.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small_coco_generated.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_conceptual_captions:
 6 |     zoo_requirements:
 7 |     - cc.coco_generated
 8 |     - cc.defaults
 9 |     annotations:
10 |       train:
11 |       - cc/coco_generated/annotations/train_small.npy
12 |       val:
13 |       - cc/coco_generated/annotations/val.npy
14 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small_fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_conceptual_captions:
6 |     annotations:
7 |       train:
8 |       - cc/defaults/annotations/train_small_50_pc.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small_ten_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_conceptual_captions:
6 |     annotations:
7 |       train:
8 |       - cc/defaults/annotations/train_small_10_pc.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_vqa2/defaults.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_vqa2/fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/vilbert/configs/masked_vqa2/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_vqa2:
6 |     annotations:
7 |       train:
8 |       - vqa2/defaults/annotations/imdb_train2014_len_coco_50_pc.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_vqa2/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../vilbert/configs/masked_vqa2/pretrain.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/vilbert/masked_vqa2/ten_pc.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../projects/vilbert/configs/masked_vqa2/pretrain.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_vqa2:
 6 |     return_features_info: true
 7 |     use_images: false
 8 |     use_features: true
 9 |     features:
10 |       train:
11 |       - coco/defaults/features/coco_trainval2014.lmdb
12 |     annotations:
13 |       train:
14 |       - vqa2/defaults/annotations/imdb_train2014_len_coco_10_pc.npy
15 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_coco/defaults.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_coco/fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_coco/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_coco:
6 |     annotations:
7 |       train:
8 |       - coco/defaults/annotations/imdb_karpathy_train_by_image_50_pc.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_coco/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_coco/pretrain.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_coco/full_train_val.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_coco/pretrain_train_val.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_coco/ten_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_coco/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_coco:
6 |     annotations:
7 |       train:
8 |       - coco/defaults/annotations/imdb_karpathy_train_by_image_10_pc.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_conceptual_captions/defaults.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_conceptual_captions/pretrain.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/full_coco_generated.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_conceptual_captions:
 6 |     zoo_requirements:
 7 |     - cc.coco_generated
 8 |     - cc.defaults
 9 |     annotations:
10 |       train:
11 |       - cc/coco_generated/annotations/train_all.npy
12 |       val:
13 |       - cc/coco_generated/annotations/val.npy
14 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/half.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_conceptual_captions:
6 |     annotations:
7 |       train:
8 |       - cc/defaults/annotations/train_mid.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/half_coco_generated.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_conceptual_captions:
 6 |     zoo_requirements:
 7 |     - cc.coco_generated
 8 |     - cc.defaults
 9 | 
10 |     annotations:
11 |       train:
12 |       - cc/coco_generated/annotations/train_mid.npy
13 |       val:
14 |       - cc/coco_generated/annotations/val.npy
15 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_conceptual_captions:
6 |     annotations:
7 |       train:
8 |       - cc/defaults/annotations/train_small.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small_coco_generated.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_conceptual_captions:
 6 |     zoo_requirements:
 7 |     - cc.coco_generated
 8 |     - cc.defaults
 9 | 
10 |     annotations:
11 |       train:
12 |       - cc/coco_generated/annotations/train_small.npy
13 |       val:
14 |       - cc/coco_generated/annotations/val.npy
15 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small_fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_conceptual_captions:
6 |     annotations:
7 |       train:
8 |       - cc/defaults/annotations/train_small_50_pc.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small_ten_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_conceptual_captions:
6 |     annotations:
7 |       train:
8 |       - cc/defaults/annotations/train_small_10_pc.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_vqa2/defaults.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/fifty_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_vqa2/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_vqa2:
6 |     annotations:
7 |       train:
8 |       - vqa2/defaults/annotations/imdb_train2014_len_coco_50_pc.npy
9 | 


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/full.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_vqa2/pretrain.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/full_train_val.yaml:
--------------------------------------------------------------------------------
1 | ../../../../visual_bert/configs/masked_vqa2/pretrain_train_val.yaml


--------------------------------------------------------------------------------
/projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/ten_pc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ../projects/visual_bert/configs/masked_vqa2/pretrain.yaml
3 | 
4 | dataset_config:
5 |   masked_vqa2:
6 |     annotations:
7 |       train:
8 |       - vqa2/defaults/annotations/imdb_train2014_len_coco_10_pc.npy
9 | 


--------------------------------------------------------------------------------
/projects/pythia/configs/masked_q_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./pythia.yaml
 3 | 
 4 | evaluation:
 5 |   metrics:
 6 |   - accuracy
 7 | 
 8 | training:
 9 |   early_stop:
10 |     criteria: masked_q_vqa2/accuracy
11 |     minimize: false
12 | 


--------------------------------------------------------------------------------
/projects/pythia/configs/textvqa/defaults.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/datasets/textvqa/with_resnet.yaml
 3 | optimizer:
 4 |   type: Adamax
 5 |   params:
 6 |     lr: 0.005
 7 | 
 8 | evaluation:
 9 |   metrics:
10 |   - vqa_accuracy
11 | 
12 | training:
13 |   clip_norm_mode: all
14 |   clip_gradients: false
15 |   max_grad_l2_norm: 0.25
16 |   lr_scheduler: true
17 |   lr_steps:
18 |   - 14000
19 |   lr_ratio: 0.01
20 |   use_warmup: true
21 |   warmup_factor: 0.2
22 |   warmup_iterations: 1000
23 |   max_updates: 24000
24 |   batch_size: 128
25 |   num_workers: 7
26 |   task_size_proportional_sampling: true
27 |   early_stop:
28 |     criteria: textvqa/vqa_accuracy
29 |     minimize: false
30 | 
31 | checkpoint:
32 |   pretrained_state_mapping:
33 |     text_embeddings: text_embeddings
34 |     image_feature_encoders: image_feature_encoders
35 |     image_feature_embeddings_list: image_feature_embeddings_list
36 |     image_text_multi_modal_combine_layer: image_text_multi_modal_combine_layer
37 | 


--------------------------------------------------------------------------------
/projects/pythia/configs/visual_genome/defaults.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   visual_genome:
 3 |     return_scene_graph: false
 4 |     return_objects: false
 5 |     return_relationships: false
 6 |     return_features_info: false
 7 |     no_unk: true
 8 | 
 9 | evaluation:
10 |   metrics:
11 |   - vqa_accuracy
12 | 
13 | training:
14 |   early_stop:
15 |     criteria: visual_genome/vqa_accuracy
16 |     minimize: false
17 | 


--------------------------------------------------------------------------------
/projects/pythia/configs/vizwiz/defaults.yaml:
--------------------------------------------------------------------------------
 1 | optimizer:
 2 |   type: Adamax
 3 |   params:
 4 |     lr: 0.005
 5 | 
 6 | evaluation:
 7 |   metrics:
 8 |   - vqa_accuracy
 9 | 
10 | training:
11 |   clip_norm_mode: all
12 |   clip_gradients: true
13 |   max_grad_l2_norm: 0.25
14 |   lr_scheduler: true
15 |   lr_steps:
16 |   - 14000
17 |   lr_ratio: 0.01
18 |   use_warmup: true
19 |   warmup_factor: 0.2
20 |   warmup_iterations: 1000
21 |   max_updates: 24000
22 |   batch_size: 128
23 |   num_workers: 7
24 |   task_size_proportional_sampling: true
25 |   early_stop:
26 |     criteria: vizwiz/vqa_accuracy
27 |     minimize: false
28 | 
29 | checkpoint:
30 |   pretrained_state_mapping:
31 |     word_embedding: word_embedding
32 |     text_embeddings: text_embeddings
33 |     image_feature_encoders: image_feature_encoders
34 |     image_feature_embeddings_list: image_feature_embeddings_list
35 |     image_text_multi_modal_combine_layer: image_text_multi_modal_combine_layer
36 | 


--------------------------------------------------------------------------------
/projects/pythia/configs/vqa2/debug.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   vqa2:
 6 |     use_images: false
 7 |     use_features: true
 8 |     features:
 9 |       train:
10 |       - coco/defaults/features/trainval2014.lmdb
11 |       val:
12 |       - coco/defaults/features/trainval2014.lmdb
13 |     annotations:
14 |       train:
15 |       - vqa2/defaults/annotations/imdb_debug.npy
16 |       val:
17 |       - vqa2/defaults/annotations/imdb_debug.npy
18 | 


--------------------------------------------------------------------------------
/projects/pythia/configs/vqa2/resnet_only.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   vqa2:
 6 |     use_images: false
 7 |     use_features: true
 8 |     zoo_requirements:
 9 |     - coco.resnet152
10 |     - vqa2.defaults
11 |     features:
12 |       train:
13 |       - coco/resnet152/features/trainval2014.lmdb
14 |       val:
15 |       - coco/resnet152/features/trainval2014.lmdb
16 |       test:
17 |       - coco/resnet152/features/test2015.lmdb
18 | model_config:
19 |   pythia:
20 |     image_feature_encodings:
21 |     - type: default
22 |       params: {}
23 | 


--------------------------------------------------------------------------------
/projects/pythia/configs/vqa2/train_val.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   vqa2:
 6 |     use_images: false
 7 |     use_features: true
 8 |     features:
 9 |       train:
10 |       - coco/defaults/features/trainval2014.lmdb,coco/resnet152/features/trainval2014.lmdb
11 |       - coco/defaults/features/trainval2014.lmdb,coco/resnet152/features/trainval2014.lmdb
12 |       val:
13 |       - coco/defaults/features/trainval2014.lmdb,coco/resnet152/features/trainval2014.lmdb
14 |       test:
15 |       - coco/defaults/features/test2015.lmdb,coco/resnet152/features/test2015.lmdb
16 |     annotations:
17 |       train:
18 |       - vqa2/defaults/annotations/imdb_train2014.npy
19 |       - vqa2/defaults/annotations/imdb_val2014.npy
20 |       val:
21 |       - vqa2/defaults/annotations/imdb_val2014.npy
22 | 


--------------------------------------------------------------------------------
/projects/pythia/configs/vqa2/train_val_resnet_only.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./resnet_only.yaml
 3 | 
 4 | dataset_config:
 5 |   vqa2:
 6 |     use_images: false
 7 |     use_features: true
 8 |     features:
 9 |       train:
10 |       - coco/resnet152/features/trainval2014.lmdb
11 |       - coco/resnet152/features/trainval2014.lmdb
12 |       val:
13 |       - coco/resnet152/features/trainval2014.lmdb
14 |       test:
15 |       - coco/resnet152/features/test2015.lmdb
16 |     annotations:
17 |       train:
18 |       - vqa2/defaults/annotations/imdb_train2014.npy
19 |       - vqa2/defaults/annotations/imdb_valminusminival2014.npy
20 |       val:
21 |       - vqa2/defaults/annotations/imdb_minival2014.npy
22 | 


--------------------------------------------------------------------------------
/projects/unit/README.md:
--------------------------------------------------------------------------------
 1 | # UniT: Multimodal Multitask Learning with a Unified Transformer
 2 | 
 3 | This repository contains the code for UniT model from the following paper, released under the MMF:
 4 | 
 5 | - R. Hu, A. Singh. _UniT: Multimodal Multitask Learning with a Unified Transformer_. arXiv preprint arXiv:2102.10772, 2021 ([PDF](https://arxiv.org/pdf/2102.10772.pdf))
 6 | 
 7 | ```
 8 | @article{hu2021unit,
 9 |   title={UniT: Multimodal multitask learning with a unified transformer},
10 |   author={Hu, Ronghang and Singh, Amanpreet},
11 |   journal={arXiv preprint arXiv:2102.10772},
12 |   year={2021}
13 | }
14 | ```
15 | 
16 | Please see [https://mmf.sh/docs/projects/unit](https://mmf.sh/docs/projects/unit) for more details on how to use the UniT model.
17 | 


--------------------------------------------------------------------------------
/projects/unit/configs/all_8_datasets/separate_dec.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./shared_dec.yaml
 3 | 
 4 | model_config:
 5 |   unit:
 6 |     base_args:
 7 |       share_decoders: false
 8 | 
 9 | optimizer:
10 |   type: adam_w  # HuggingFace transformer's AdamW
11 | 


--------------------------------------------------------------------------------
/projects/unit/configs/all_8_datasets/shared_dec_without_task_embedding.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./shared_dec.yaml
3 | 
4 | model_config:
5 |   unit:
6 |     base_args:
7 |       use_task_embedding_in_img_encoder: false
8 |       use_task_embedding_in_lang_encoder: false
9 | 


--------------------------------------------------------------------------------
/projects/unit/configs/coco/single_task_without_task_embedding.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./single_task.yaml
3 | 
4 | model_config:
5 |   unit:
6 |     base_args:
7 |       use_task_embedding_in_img_encoder: false
8 |       use_task_embedding_in_lang_encoder: false
9 | 


--------------------------------------------------------------------------------
/projects/unit/configs/coco_vg_vqa2/separate_dec.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./shared_dec.yaml
 3 | 
 4 | model_config:
 5 |   unit:
 6 |     base_args:
 7 |       share_decoders: false
 8 | 
 9 | optimizer:
10 |   type: adam_w  # HuggingFace transformer's AdamW
11 | 


--------------------------------------------------------------------------------
/projects/unit/configs/coco_vqa2/separate_dec.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./shared_dec.yaml
 3 | 
 4 | model_config:
 5 |   unit:
 6 |     base_args:
 7 |       share_decoders: false
 8 | 
 9 | optimizer:
10 |   type: adam_w  # HuggingFace transformer's AdamW
11 | 


--------------------------------------------------------------------------------
/projects/unit/configs/vg_vqa2/separate_dec.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./shared_dec.yaml
 3 | 
 4 | model_config:
 5 |   unit:
 6 |     base_args:
 7 |       share_decoders: false
 8 | 
 9 | optimizer:
10 |   type: adam_w  # HuggingFace transformer's AdamW
11 | 


--------------------------------------------------------------------------------
/projects/unit/configs/visual_entailment_dataset_cfg.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   visual_entailment:
 3 |     zoo_requirements:
 4 |     - visual_entailment.defaults
 5 |     - flickr30k.defaults
 6 |     use_features: false
 7 |     use_images: true
 8 |     processors:
 9 |       image_processor:
10 |         type: torchvision_transforms
11 |         params:
12 |           transforms:
13 |             - type: ResizeShortest
14 |               params:
15 |                 min_size: 800
16 |                 max_size: 1333
17 |             - ToTensor
18 |             - type: Normalize
19 |               params:
20 |                 mean: [0.485, 0.456, 0.406]
21 |                 std: [0.229, 0.224, 0.225]
22 |       text_processor:
23 |         type: bert_tokenizer
24 |         params:
25 |           tokenizer_config:
26 |             type: bert-base-uncased
27 |             params:
28 |               do_lower_case: true
29 |           mask_probability: 0
30 |           max_seq_length: 25
31 | 


--------------------------------------------------------------------------------
/projects/uniter/README.md:
--------------------------------------------------------------------------------
 1 | # UNITER
 2 | 
 3 | This repository contains the code for pytorch implementation of UNITER model, released originally under this ([repo](https://github.com/ChenRocks/UNITER/)). Please cite the following papers if you are using UNITER model from mmf:
 4 | 
 5 | * Chen, Y.-C., Li, L., Yu, L., Kholy, A. E., Ahmed, F., Gan,
 6 | Z., Cheng, Y., and jing Liu, J. *Uniter: Universal imagetext representation learning.* In European Conference on
 7 | Computer Vision, 2020b. ([arXiV](https://arxiv.org/pdf/1909.11740))
 8 | ```
 9 | @inproceedings{chen2020uniter,
10 |   title={Uniter: Universal image-text representation learning},
11 |   author={Chen, Yen-Chun and Li, Linjie and Yu, Licheng and Kholy, Ahmed El and Ahmed, Faisal and Gan, Zhe and Cheng, Yu and Liu, Jingjing},
12 |   booktitle={ECCV},
13 |   year={2020}
14 | }
15 | ```
16 | 
17 | 
18 | Please see [https://mmf.sh/docs/projects/uniter](https://mmf.sh/docs/projects/uniter) for more details on how to use the UNITER model.
19 | 


--------------------------------------------------------------------------------
/projects/vilbert/configs/hateful_memes/direct.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/vilbert/configs/hateful_memes/from_cc.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 
4 | checkpoint:
5 |   resume_pretrained: true
6 |   resume_zoo: vilbert.pretrained.cc.original
7 | 


--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   vilbert:
 3 |     training_head_type: pretraining
 4 | 
 5 | dataset_config:
 6 |   masked_coco:
 7 |     return_features_info: true
 8 |     use_image_feature_masks: true
 9 | 
10 | optimizer:
11 |   type: adam_w
12 |   params:
13 |     lr: 5e-5
14 |     eps: 1e-8
15 | 
16 | scheduler:
17 |   type: warmup_linear
18 |   params:
19 |     num_warmup_steps: 1000
20 |     num_training_steps: 11000
21 | 
22 | training:
23 |   batch_size: 480
24 |   lr_scheduler: true
25 |   # Don't forget to update schedule_attributes if you update this
26 |   max_updates: 11000
27 |   find_unused_parameters: true
28 | 


--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_coco/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_coco/pretrain_train_val.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_coco:
 6 |     return_features_info: true
 7 |     use_images: false
 8 |     use_features: true
 9 |     features:
10 |       train:
11 |       - coco/defaults/features/coco_trainval2014.lmdb
12 |       - coco/defaults/features/coco_trainval2014.lmdb
13 |     annotations:
14 |       train:
15 |       - coco/defaults/annotations/imdb_karpathy_train_by_image.npy
16 |       - coco/defaults/annotations/imdb_karpathy_val_by_image.npy
17 | 


--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   vilbert:
 3 |     training_head_type: pretraining
 4 | 
 5 | dataset_config:
 6 |   masked_conceptual_captions:
 7 |     return_features_info: true
 8 |     use_image_feature_masks: true
 9 | 
10 | optimizer:
11 |   type: adam_w
12 |   params:
13 |     lr: 5e-5
14 |     eps: 1e-8
15 | 
16 | scheduler:
17 |   type: warmup_linear
18 |   params:
19 |     num_warmup_steps: 1000
20 |     num_training_steps: 11000
21 | 
22 | training:
23 |   batch_size: 480
24 |   lr_scheduler: true
25 |   # Don't forget to update schedule_attributes if you update this
26 |   max_updates: 11000
27 |   find_unused_parameters: true
28 | 


--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   vilbert:
 3 |     training_head_type: pretraining
 4 | 
 5 | dataset_config:
 6 |   masked_vqa2:
 7 |     annotations:
 8 |       train:
 9 |       - vqa2/defaults/annotations/imdb_train2014.npy
10 |     return_features_info: true
11 |     use_image_feature_masks: true
12 | 
13 | optimizer:
14 |   type: adam_w
15 |   params:
16 |     lr: 5e-5
17 |     eps: 1e-8
18 | 
19 | scheduler:
20 |   type: warmup_linear
21 |   params:
22 |     num_warmup_steps: 1000
23 |     num_training_steps: 11000
24 | 
25 | training:
26 |   batch_size: 480
27 |   lr_scheduler: true
28 |   # Don't forget to update schedule_attributes if you update this
29 |   max_updates: 11000
30 |   find_unused_parameters: true
31 | 


--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_vqa2/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/vilbert/configs/masked_vqa2/pretrain_train_val.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_vqa2:
 6 |     use_images: false
 7 |     use_features: true
 8 |     features:
 9 |       train:
10 |       - coco/defaults/features/coco_trainval2014.lmdb
11 |       - coco/defaults/features/coco_trainval2014.lmdb
12 |     annotations:
13 |       train:
14 |       - vqa2/defaults/annotations/imdb_train2014.npy
15 |       - vqa2/defaults/annotations/imdb_val2014.npy
16 |     return_features_info: true
17 |     use_image_feature_masks: true
18 | 


--------------------------------------------------------------------------------
/projects/vilbert/configs/mmimdb/pretrain.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/projects/vilbert/configs/mmimdb/pretrain.yaml


--------------------------------------------------------------------------------
/projects/vilbert/configs/vqa2/train_val.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   vqa2:
 6 |     use_images: false
 7 |     use_features: true
 8 |     features:
 9 |       train:
10 |       - coco/defaults/features/coco_trainval2014.lmdb
11 |       - coco/defaults/features/coco_trainval2014.lmdb
12 |     annotations:
13 |       train:
14 |       - vqa2/defaults/annotations/imdb_train2014.npy
15 |       - vqa2/defaults/annotations/imdb_val2014.npy
16 |     return_features_info: true
17 | 


--------------------------------------------------------------------------------
/projects/vilt/README.md:
--------------------------------------------------------------------------------
 1 | # ViLT
 2 | 
 3 | This repository contains the code for pytorch implementation of ViLT model, released originally under this ([repo](https://github.com/dandelin/ViLT)). Please cite the following papers if you are using ViLT model from mmf:
 4 | 
 5 | * Wonjae Kim, Bokyung Son, and Ildoo Kim. 2021. *ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision}*. In 38th International Conference on Machine Learning (ICML). ([arXiV](https://arxiv.org/pdf/2102.03334))
 6 | ```
 7 | @misc{kim2021vilt,
 8 |       title={ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision},
 9 |       author={Wonjae Kim and Bokyung Son and Ildoo Kim},
10 |       year={2021},
11 |       eprint={2102.03334},
12 |       archivePrefix={arXiv},
13 |       primaryClass={stat.ML}
14 | }
15 | ```
16 | 
17 | Please see [https://mmf.sh/docs/projects/vilt](https://mmf.sh/docs/projects/vilt) for more details on how to use the ViLT model.
18 | 


--------------------------------------------------------------------------------
/projects/vilt/configs/vqa2/vit_b16_224.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../projects/unit/configs/vqa2_dataset_cfg.yaml
 3 | - ./defaults.yaml
 4 | 
 5 | other_configs:
 6 |   image_w: 224
 7 |   image_h: 224
 8 |   hidden_dim: 768
 9 | 
10 | model_config:
11 |   vilt:
12 |     image_encoder:
13 |       type: vit
14 |       params:
15 |         random_init: False
16 |         pretrained_model_name: google/vit-base-patch16-224
17 |         image_size:
18 |         - ${other_configs.image_w}
19 |         - ${other_configs.image_h}
20 |         hidden_dim: ${other_configs.hidden_dim}
21 |         pretrained_model: vit_base_patch16_224
22 |         mlp_dim: 3072
23 | 
24 |     text_embeddings:
25 |       type: vilt_text_embedding
26 |       bert_model_name: bert-base-uncased
27 |       hidden_dim: ${other_configs.hidden_size}
28 |       hidden_size: 768
29 |       max_position_embeddings: 512
30 |       random_init: False
31 | 


--------------------------------------------------------------------------------
/projects/vilt/configs/vqa2/vit_b32_384.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../projects/unit/configs/vqa2_dataset_cfg.yaml
 3 | - ./defaults.yaml
 4 | 
 5 | other_configs:
 6 |   image_w: 384
 7 |   image_h: 384
 8 |   hidden_dim: 768
 9 | 
10 | model_config:
11 |   vilt:
12 |     image_encoder:
13 |       type: vit
14 |       params:
15 |         random_init: False
16 |         pretrained_model_name: google/vit-base-patch32-384
17 |         image_size:
18 |         - ${other_configs.image_w}
19 |         - ${other_configs.image_h}
20 |         hidden_dim: ${other_configs.hidden_dim}
21 |         pretrained_model: vit_base_patch32_384
22 |         mlp_dim: 3072
23 | 
24 |     text_embeddings:
25 |       type: vilt_text_embedding
26 |       bert_model_name: bert-base-uncased
27 |       hidden_dim: ${other_configs.hidden_dim}
28 |       hidden_size: 768
29 |       max_position_embeddings: 512
30 |       random_init: False
31 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/hateful_memes/direct.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 
4 | training:
5 |   batch_size: 128
6 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/hateful_memes/from_coco.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 
4 | checkpoint:
5 |   resume_pretrained: true
6 |   resume_zoo: visual_bert.pretrained.coco
7 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/localized_narratives/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   visual_bert:
 3 |     hidden_size: 768
 4 |     hidden_dropout_prob: 0.1
 5 |     training_head_type: classification
 6 |     num_labels: 3129
 7 | 
 8 | dataset_config:
 9 |   masked_localized_narratives:
10 |     return_features_info: true
11 | 
12 | optimizer:
13 |   type: adam_w
14 |   params:
15 |     lr: 5e-5
16 |     eps: 1e-8
17 | 
18 | scheduler:
19 |   type: warmup_linear
20 |   params:
21 |     num_warmup_steps: 1000
22 |     num_training_steps: 11000
23 | 
24 | training:
25 |   batch_size: 32
26 |   lr_scheduler: true
27 |   num_workers: 0
28 |   # Don't forget to update schedule_attributes if you update this
29 |   max_updates: 88000
30 |   find_unused_parameters: true
31 | 
32 | checkpoint:
33 |   pretrained_state_mapping:
34 |     model.bert: model.bert
35 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/localized_narratives/pretrain.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ../../../../mmf/configs/datasets/coco2017/masked.yaml
 3 | - ../../../../mmf/configs/datasets/flickr30k/masked.yaml
 4 | - ../../../../mmf/configs/datasets/localized_narratives/masked.yaml
 5 | 
 6 | model_config:
 7 |   visual_bert:
 8 |     training_head_type: pretraining
 9 | 
10 | optimizer:
11 |   type: adam_w
12 |   params:
13 |     lr: 5e-5
14 |     eps: 1e-8
15 | 
16 | scheduler:
17 |   type: warmup_linear
18 |   params:
19 |     num_warmup_steps: 1000
20 |     num_training_steps: 11000
21 | 
22 | training:
23 |   batch_size: 32
24 |   lr_scheduler: true
25 |   num_workers: 0
26 |   # Don't forget to update schedule_attributes if you update this
27 |   max_updates: 88000
28 |   find_unused_parameters: true
29 | 
30 | checkpoint:
31 |   pretrained_state_mapping:
32 |     model.bert: model.bert
33 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_coco/defaults.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   masked_coco:
 3 |     return_features_info: true
 4 | 
 5 | optimizer:
 6 |   type: adam_w
 7 |   params:
 8 |     lr: 5e-5
 9 |     eps: 1e-8
10 | 
11 | scheduler:
12 |   type: warmup_linear
13 |   params:
14 |     num_warmup_steps: 1000
15 |     num_training_steps: 11000
16 | 
17 | training:
18 |   batch_size: 480
19 |   lr_scheduler: true
20 |   # Don't forget to update schedule_attributes if you update this
21 |   max_updates: 11000
22 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_coco/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_coco/pretrain_train_val.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_coco:
 6 |     return_features_info: true
 7 |     use_images: false
 8 |     use_features: true
 9 |     features:
10 |       train:
11 |       - coco/defaults/features/trainval2014.lmdb
12 |       - coco/defaults/features/trainval2014.lmdb
13 |     annotations:
14 |       train:
15 |       - coco/defaults/annotations/imdb_karpathy_train_by_image.npy
16 |       - coco/defaults/annotations/imdb_karpathy_val_by_image.npy
17 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_conceptual_captions/defaults.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   masked_conceptual_captions:
 3 |     return_features_info: true
 4 | 
 5 | optimizer:
 6 |   type: adam_w
 7 |   params:
 8 |     lr: 5e-5
 9 |     eps: 1e-8
10 | 
11 | scheduler:
12 |   type: warmup_linear
13 |   params:
14 |     num_warmup_steps: 1000
15 |     num_training_steps: 11000
16 | 
17 | training:
18 |   batch_size: 480
19 |   lr_scheduler: true
20 |   # Don't forget to update schedule_attributes if you update this
21 |   max_updates: 11000
22 |   find_unused_parameters: true
23 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_gqa/defaults.yaml:
--------------------------------------------------------------------------------
 1 | optimizer:
 2 |   type: adam_w
 3 |   params:
 4 |     lr: 5e-5
 5 |     eps: 1e-8
 6 | 
 7 | scheduler:
 8 |   type: warmup_linear
 9 |   params:
10 |     num_warmup_steps: 2000
11 |     num_training_steps: 88000
12 | 
13 | training:
14 |   batch_size: 480
15 |   lr_scheduler: true
16 |   # Don't forget to update schedule_attributes if you update this
17 |   max_updates: 88000
18 |   find_unused_parameters: true
19 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_sbu/defaults.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   visual_bert:
 3 |     bert_model_name: bert-base-uncased
 4 |     training_head_type: pretraining
 5 |     visual_embedding_dim: 2048
 6 |     special_visual_initialize: true
 7 |     hard_cap_seq_len: null
 8 |     cut_first: text
 9 |     embedding_strategy: plain
10 |     bypass_transformer: false
11 |     output_attentions: false
12 |     output_hidden_states: false
13 |     text_only: false
14 |     random_initialize: false
15 | 
16 | dataset_config:
17 |   masked_sbu:
18 |     return_features_info: true
19 | 
20 | optimizer:
21 |   type: adam_w
22 |   params:
23 |     lr: 5e-5
24 |     eps: 1e-8
25 | 
26 | scheduler:
27 |   type: warmup_linear
28 |   params:
29 |     num_warmup_steps: 1000
30 |     num_training_steps: 11000
31 | 
32 | training:
33 |   batch_size: 480
34 |   lr_scheduler: true
35 |   # Don't forget to update schedule_attributes if you update this
36 |   max_updates: 11000
37 |   find_unused_parameters: true
38 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_sbu/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_vqa2/defaults.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   masked_vqa2:
 3 |     annotations:
 4 |       train:
 5 |       - vqa2/defaults/annotations/imdb_train2014.npy
 6 |     return_features_info: true
 7 | 
 8 | optimizer:
 9 |   type: adam_w
10 |   params:
11 |     lr: 5e-5
12 |     eps: 1e-8
13 | 
14 | scheduler:
15 |   type: warmup_linear
16 |   params:
17 |     num_warmup_steps: 1000
18 |     num_training_steps: 11000
19 | 
20 | training:
21 |   batch_size: 480
22 |   lr_scheduler: true
23 |   # Don't forget to update schedule_attributes if you update this
24 |   max_updates: 11000
25 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_vqa2/pretrain.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/masked_vqa2/pretrain_train_val.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_vqa2:
 6 |     use_images: false
 7 |     use_features: true
 8 |     features:
 9 |       train:
10 |       - coco/defaults/features/coco_trainval2014.lmdb
11 |       - coco/defaults/features/coco_trainval2014.lmdb
12 |     annotations:
13 |       train:
14 |       - vqa2/defaults/annotations/imdb_train2014.npy
15 |       - vqa2/defaults/annotations/imdb_val2014.npy
16 |     return_features_info: true
17 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/mmimdb/pretrain.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   masked_mmimdb:
 6 |     return_features_info: true
 7 | 
 8 | model_config:
 9 |   visual_bert:
10 |     training_head_type: pretraining
11 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/vizwiz/train_val.yaml:
--------------------------------------------------------------------------------
 1 | dataset_config:
 2 |   vizwiz:
 3 |     return_features_info: true
 4 |     use_images: false
 5 |     use_features: true
 6 |     features:
 7 |       train:
 8 |       - vizwiz/v2019/features/detectron.lmdb
 9 |       - vizwiz/v2019/features/detectron.lmdb
10 |       val:
11 |       - vizwiz/v2019/features/detectron.lmdb
12 |       test:
13 |       - vizwiz/v2019/features/detectron.lmdb
14 |     annotations:
15 |       train:
16 |       - datasets/vizwiz/imdbs/imdb_vizwiz_train.npy
17 |       - datasets/vizwiz/imdbs/imdb_vizwiz_val.npy
18 |     processors:
19 |       # Stop fasttext from loading by overriding the context_processor
20 |       context_processor:
21 |         type: simple_word
22 |         params: {}
23 |       text_processor:
24 |         type: bert_tokenizer
25 |         params:
26 |           tokenizer_config:
27 |             type: bert-base-uncased
28 |             params:
29 |               do_lower_case: true
30 |           mask_probability: 0
31 |           max_seq_length: 128
32 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/vqa2/train_val.yaml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - ./defaults.yaml
 3 | 
 4 | dataset_config:
 5 |   vqa2:
 6 |     use_images: false
 7 |     use_features: true
 8 |     features:
 9 |       train:
10 |       - coco/defaults/features/coco_trainval2014.lmdb
11 |       - coco/defaults/features/coco_trainval2014.lmdb
12 |     annotations:
13 |       train:
14 |       - vqa2/defaults/annotations/imdb_train2014.npy
15 |       - vqa2/defaults/annotations/imdb_val2014.npy
16 |     return_features_info: true
17 |     processors:
18 |       text_processor:
19 |         type: bert_tokenizer
20 |         params:
21 |           tokenizer_config:
22 |             type: bert-base-uncased
23 |             params:
24 |               do_lower_case: true
25 |           mask_probability: 0
26 |           max_seq_length: 128
27 | 


--------------------------------------------------------------------------------
/projects/visual_bert/configs/vqa2/with_raw_images.yaml:
--------------------------------------------------------------------------------
1 | includes:
2 | - ./defaults.yaml
3 | - ../../../../mmf/configs/datasets/vqa2/with_raw_images.yaml
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==1.11.0
 2 | torchaudio==0.11.0
 3 | torchvision==0.12.0
 4 | numpy>=1.16.6, <=1.21.4
 5 | tqdm>=4.43.0,<4.50.0
 6 | torchtext==0.12.0
 7 | GitPython==3.1.30
 8 | requests==2.23.0
 9 | fasttext==0.9.1
10 | nltk==3.6.6
11 | editdistance==0.5.3
12 | transformers>=3.4.0, <=4.10.1
13 | sklearn==0.0
14 | omegaconf>=2.0.6, <=2.1
15 | lmdb==0.98
16 | termcolor==1.1.0
17 | iopath==0.1.8
18 | datasets==1.2.1
19 | matplotlib==3.3.4
20 | pycocotools==2.0.2
21 | ftfy==5.8
22 | pytorch-lightning==1.6.0
23 | psutil
24 | pillow==9.3.0
25 | sentencepiece
26 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from mmf.utils.patch import patch_transformers
3 | 
4 | 
5 | patch_transformers()
6 | 


--------------------------------------------------------------------------------
/tests/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tests/common/test_meter.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import unittest
 3 | 
 4 | import torch
 5 | from mmf.common.meter import Meter
 6 | from mmf.common.report import Report
 7 | from mmf.common.sample import SampleList
 8 | 
 9 | 
10 | class TestMeter(unittest.TestCase):
11 |     def test_meter_update_from_report(self):
12 |         meter = Meter()
13 |         prepared_batch = SampleList(
14 |             {"targets": torch.tensor([1, 2, 3, 4]), "dataset_type": "val"}
15 |         )
16 |         for idx in range(5):
17 |             model_output = {
18 |                 "scores": torch.tensor([0, 1, 2, 3]),
19 |                 "losses": {"loss": float(idx)},
20 |             }
21 |             report = Report(prepared_batch, model_output)
22 |             meter.update_from_report(report)
23 | 
24 |         self.assertEqual(meter.loss.global_avg, 2.0)
25 |         self.assertEqual(meter.loss.avg, 2.0)
26 | 


--------------------------------------------------------------------------------
/tests/configs/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tests/data/user_dir/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # flake8: noqa: F401
3 | 
4 | from . import datasets, models
5 | 


--------------------------------------------------------------------------------
/tests/data/user_dir/configs/always_one.yaml:
--------------------------------------------------------------------------------
1 | dataset_config:
2 |   always_one: {}
3 | 


--------------------------------------------------------------------------------
/tests/data/user_dir/configs/experiment.yaml:
--------------------------------------------------------------------------------
 1 | model_config:
 2 |   simple:
 3 |     losses:
 4 |     - type: cross_entropy
 5 | 
 6 | optimizer:
 7 |   type: SGD
 8 |   params:
 9 |     lr: 1e-3
10 | 
11 | evaluation:
12 |     metrics:
13 |     - accuracy
14 | 
15 | training:
16 |   batch_size: 8
17 |   lr_scheduler: false
18 |   max_updates: 50
19 |   early_stop:
20 |     criteria: always_one/accuracy
21 |     minimize: false
22 |   log_format: json
23 | 


--------------------------------------------------------------------------------
/tests/data/user_dir/configs/simple.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 |   simple:
3 |     in_dim: 1
4 |     data_item_key: input
5 | 


--------------------------------------------------------------------------------
/tests/data/user_dir/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # flake8: noqa: F401
3 | 
4 | from . import always_one
5 | 


--------------------------------------------------------------------------------
/tests/data/user_dir/datasets/always_one.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.common.registry import registry
 4 | from mmf.datasets.base_dataset_builder import BaseDatasetBuilder
 5 | from tests.test_utils import NumbersDataset
 6 | 
 7 | 
 8 | DATASET_LEN = 20
 9 | 
10 | 
11 | @registry.register_builder("always_one")
12 | class AlwaysOneBuilder(BaseDatasetBuilder):
13 |     def __init__(self):
14 |         super().__init__("always_one")
15 | 
16 |     def build(self, *args, **Kwargs):
17 |         pass
18 | 
19 |     @classmethod
20 |     def config_path(cls):
21 |         return "configs/always_one.yaml"
22 | 
23 |     def load(self, config, dataset_type="train", *args, **kwargs):
24 |         dataset = NumbersDataset(DATASET_LEN, data_item_key="input", always_one=True)
25 |         dataset.dataset_name = self.dataset_name
26 |         dataset.dataset_type = dataset_type
27 |         return dataset
28 | 


--------------------------------------------------------------------------------
/tests/data/user_dir/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # flake8: noqa: F401
3 | 
4 | from . import simple
5 | 


--------------------------------------------------------------------------------
/tests/data/user_dir/models/simple.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from mmf.common.registry import registry
 4 | from tests.test_utils import SimpleModel
 5 | 
 6 | 
 7 | @registry.register_model("simple")
 8 | class CustomSimpleModel(SimpleModel):
 9 |     @classmethod
10 |     def config_path(cls):
11 |         return "configs/simple.yaml"
12 | 
13 |     def forward(self, sample_list):
14 |         return {"scores": self.classifier(sample_list.input)}
15 | 


--------------------------------------------------------------------------------
/tests/data/vocab.txt:
--------------------------------------------------------------------------------
 1 | a
 2 | man
 3 | with
 4 | red
 5 | helmet
 6 | on
 7 | small
 8 | moped
 9 | dirt
10 | road
11 | riding
12 | motor
13 | bike
14 | the
15 | countryside
16 | back
17 | of
18 | motorcycle
19 | 


--------------------------------------------------------------------------------
/tests/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tests/datasets/test_prediction_processors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import unittest
 4 | 
 5 | import torch
 6 | from mmf.common.report import Report
 7 | from mmf.common.sample import SampleList
 8 | from mmf.datasets.processors.prediction_processors import ArgMaxPredictionProcessor
 9 | 
10 | 
11 | class TestDatasetProcessors(unittest.TestCase):
12 |     def setUp(self):
13 |         torch.manual_seed(1234)
14 | 
15 |     def test_argmax_prediction_processor(self):
16 |         processor = ArgMaxPredictionProcessor(config={})
17 |         batch = SampleList({"id": torch.tensor([1, 2, 3, 4, 5], dtype=torch.long)})
18 |         model_output = {"scores": torch.rand(5, 4)}
19 |         report = Report(batch, model_output)
20 | 
21 |         predictions = processor(report)
22 | 
23 |         expected_answers = [1, 1, 2, 1, 3]
24 |         expected = []
25 |         for idx, answer in enumerate(expected_answers):
26 |             expected.append({"id": idx + 1, "answer": answer})
27 | 
28 |         self.assertEqual(predictions, expected)
29 | 


--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tests/models/interfaces/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tests/models/test_albef.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import unittest
 3 | 
 4 | import torch
 5 | from mmf.models.albef.vit import AlbefVitEncoder
 6 | from omegaconf import OmegaConf
 7 | from tests.test_utils import setup_proxy
 8 | from torch import nn
 9 | 
10 | 
11 | class TestAlbefEncoders(unittest.TestCase):
12 |     def setUp(self):
13 |         setup_proxy()
14 | 
15 |     def _test_init(self, cls, **params):
16 |         encoder = cls.from_params(**params)
17 |         self.assertTrue(isinstance(encoder, nn.Module))
18 | 
19 |     def test_vision_transformer(self):
20 |         config = OmegaConf.structured(AlbefVitEncoder.Config())
21 |         encoder = AlbefVitEncoder(config)
22 |         x = torch.rand((1, 3, 224, 224))
23 |         output = encoder(x)
24 |         self.assertEqual(output.size(-1), config.out_dim)
25 | 


--------------------------------------------------------------------------------
/tests/models/transformers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tests/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tests/modules/test_hf_layers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import unittest
 4 | 
 5 | from mmf.modules.hf_layers import replace_with_jit, undo_replace_with_jit
 6 | 
 7 | try:
 8 |     from transformers3.modeling_bert import BertSelfAttention
 9 | except ImportError:
10 |     from transformers.modeling_bert import BertSelfAttention
11 | 
12 | 
13 | class TestHFLayers(unittest.TestCase):
14 |     def test_undo_replace_with_jit(self):
15 |         original_function = BertSelfAttention.forward
16 |         replace_with_jit()
17 |         undo_replace_with_jit()
18 |         self.assertTrue(BertSelfAttention.forward is original_function)
19 | 


--------------------------------------------------------------------------------
/tests/trainers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tests/trainers/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tests/trainers/lightning/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tests/trainers/test_device.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import unittest
 3 | 
 4 | import torch
 5 | from mmf.trainers.core.device import TrainerDeviceMixin
 6 | from mmf.utils.general import get_current_device
 7 | from omegaconf import OmegaConf
 8 | 
 9 | 
10 | class DeviceMock(TrainerDeviceMixin):
11 |     def __init__(self, config):
12 |         self.config = config
13 | 
14 | 
15 | class TestDevice(unittest.TestCase):
16 |     def test_current_device(self):
17 |         config = {
18 |             "training": {"seed": 1, "cudnn_benchmark": False},
19 |             "distributed": {"init_method": None},
20 |         }
21 |         deviceMock = DeviceMock(OmegaConf.create(config))
22 |         deviceMock.configure_seed()
23 |         deviceMock.configure_device()
24 |         device = get_current_device()
25 |         if torch.cuda.is_available():
26 |             self.assertEqual(device, "cuda:0")
27 |         else:
28 |             self.assertEqual(device, torch.device(type="cpu"))
29 | 


--------------------------------------------------------------------------------
/tests/trainers/test_eval_loop.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import unittest
 4 | from unittest.mock import MagicMock, patch
 5 | 
 6 | import torch
 7 | from tests.trainers.test_utils import get_config_with_defaults, get_mmf_trainer
 8 | 
 9 | 
10 | class TestEvalLoop(unittest.TestCase):
11 |     def setUp(self):
12 |         torch.manual_seed(2)
13 | 
14 |     @patch(
15 |         "mmf.common.test_reporter.PathManager",
16 |         return_value=MagicMock(return_value=None),
17 |     )
18 |     @patch("mmf.common.test_reporter.get_mmf_env", return_value="")
19 |     def test_eval_loop(self, a, b):
20 |         config = get_config_with_defaults(
21 |             {"training": {"max_updates": 2, "max_epochs": 2}}
22 |         )
23 |         trainer = get_mmf_trainer(config=config)
24 |         combined_report, meter = trainer.evaluation_loop("val")
25 |         self.assertAlmostEqual(combined_report["losses"]["loss"], 493377.5312)
26 |         self.assertAlmostEqual(combined_report["logits"].item(), -0.2379742, 6)
27 | 


--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tests/utils/test_distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import unittest
 3 | 
 4 | import mmf.utils.distributed as distributed
 5 | 
 6 | 
 7 | class TestUtilsDistributed(unittest.TestCase):
 8 |     def test_object_byte_tensor_conversion(self):
 9 |         test_obj = [1, "2", {3: 4}, [5]]
10 |         test_obj_bytes = distributed.object_to_byte_tensor(test_obj)
11 |         test_obj_dec = distributed.byte_tensor_to_object(test_obj_bytes)
12 |         self.assertEqual(test_obj_dec, test_obj)
13 | 


--------------------------------------------------------------------------------
/tests/utils/test_patch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import unittest
 4 | 
 5 | from mmf.common.registry import registry
 6 | from mmf.utils.patch import (
 7 |     ORIGINAL_PATCH_FUNCTIONS_KEY,
 8 |     restore_saved_modules,
 9 |     safecopy_modules,
10 | )
11 | 
12 | 
13 | class TestClass:
14 |     @staticmethod
15 |     def test_function():
16 |         return True
17 | 
18 | 
19 | class TestUtilsPatch(unittest.TestCase):
20 |     def setUp(self):
21 |         registry.register(ORIGINAL_PATCH_FUNCTIONS_KEY, {})
22 | 
23 |     def test_safecopy_modules(self):
24 |         safecopy_modules(["TestClass.test_function"], {"TestClass": TestClass})
25 |         original_functions = registry.get(ORIGINAL_PATCH_FUNCTIONS_KEY)
26 |         self.assertTrue("TestClass.test_function" in original_functions)
27 | 
28 |         TestClass.test_function = lambda: False
29 |         restore_saved_modules({"TestClass": TestClass})
30 |         self.assertTrue(TestClass.test_function())
31 | 


--------------------------------------------------------------------------------
/tests/utils/test_timer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import time
 3 | import unittest
 4 | 
 5 | from mmf.utils.timer import Timer
 6 | 
 7 | 
 8 | class TestUtilsTimer(unittest.TestCase):
 9 |     def test_get_current(self):
10 |         timer = Timer()
11 |         expected = 0
12 | 
13 |         self.assertEqual(int(timer.get_current().split("ms")[0]), expected)
14 | 
15 |     def test_reset(self):
16 |         timer = Timer()
17 |         time.sleep(2)
18 |         timer.reset()
19 |         expected = 0
20 | 
21 |         self.assertEqual(int(timer.get_current().split("ms")[0]), expected)
22 | 
23 |     def test_get_time_since_start(self):
24 |         timer = Timer()
25 |         time.sleep(2)
26 |         expected = 2
27 | 
28 |         self.assertEqual(expected, int(timer.get_time_since_start().split("s")[0]))
29 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tools/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/tools/scripts/bert/extract_bert.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | N_REM=`expr $3 - 1`
3 | 
4 | for i in $(seq 0 $N_REM); do
5 |     python tools/scripts/bert/extract_bert_embeddings.py --imdb_path $1 --out_path $2 --group_id $i --n_groups $3 &
6 | done
7 | 


--------------------------------------------------------------------------------
/tools/scripts/gqa/README.md:
--------------------------------------------------------------------------------
 1 | # Converstion of GQA to VQA format
 2 | 
 3 | * Download GQA datasets and store as format shown in conversion script
 4 | * Download glove embeddings 300D file
 5 | * Run the script from the root of the repo as by changing relevant paths:
 6 | 
 7 | ```
 8 | python tools/scripts/gqa/convert_gqa_to_vqa.py --gqa_dir <path_to_gqa_dir> --out_dir <path_to_out_dir>
 9 | ```
10 | 


--------------------------------------------------------------------------------
/tools/scripts/visual_dialog/extract_vocabulary.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import json
 3 | 
 4 | from tools.scripts.gqa.extract_vocabulary import ExtractVocabulary
 5 | 
 6 | 
 7 | class ExtractVisdialVocabulary(ExtractVocabulary):
 8 |     def __init__(self):
 9 |         super().__init__()
10 | 
11 |     def get_text(self):
12 |         text = []
13 | 
14 |         for input_file in self.input_files:
15 |             with open(input_file) as f:
16 |                 f_json = json.load(f)
17 |                 # Add 'questions' from visdial
18 |                 text += f_json["data"]["questions"]
19 |                 # Add 'answers' from visdial
20 |                 text += f_json["data"]["answers"]
21 | 
22 |                 for dialog in f_json["data"]["dialogs"]:
23 |                     text += [dialog["caption"]]
24 |         return text
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     extractor = ExtractVisdialVocabulary()
29 |     extractor.extract()
30 | 


--------------------------------------------------------------------------------
/tools/sweeps/README.md:
--------------------------------------------------------------------------------
1 | # Sweep Scripts
2 | 
3 | See [https://mmf.sh/docs/tutorials/slurm](https://mmf.sh/docs/tutorials/slurm) for tutorial on how to use these scripts.
4 | 


--------------------------------------------------------------------------------
/website/.eslintignore:
--------------------------------------------------------------------------------
1 | .docusaurus
2 | static/api
3 | build/
4 | 


--------------------------------------------------------------------------------
/website/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | /node_modules
 3 | 
 4 | # Production
 5 | /build
 6 | 
 7 | # Generated files
 8 | .docusaurus
 9 | .cache-loader
10 | 
11 | # Misc
12 | .DS_Store
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | 
18 | npm-debug.log*
19 | yarn-debug.log*
20 | yarn-error.log*
21 | 
22 | # ESLint
23 | .eslintcache
24 | 
25 | # Static Docs
26 | static/api
27 | 


--------------------------------------------------------------------------------
/website/.prettierignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | build
3 | .docusaurus
4 | static/api
5 | 


--------------------------------------------------------------------------------
/website/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "arrowParens": "always",
 3 |   "bracketSpacing": false,
 4 |   "jsxBracketSameLine": true,
 5 |   "printWidth": 80,
 6 |   "proseWrap": "never",
 7 |   "singleQuote": true,
 8 |   "trailingComma": "all"
 9 | }
10 | 


--------------------------------------------------------------------------------
/website/.stylelintrc.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | module.exports = {
 9 |   plugins: ['stylelint-copyright'],
10 |   rules: {
11 |     'docusaurus/copyright-header': true,
12 |   },
13 | };
14 | 


--------------------------------------------------------------------------------
/website/docs/getting_started/faqs.md:
--------------------------------------------------------------------------------
1 | ---
2 | id: faqs
3 | title: Frequently Asked Questions (FAQ)
4 | sidebar_label: FAQs
5 | ---
6 | ## Coming Soon!
7 | 


--------------------------------------------------------------------------------
/website/docs/getting_started/video_overview.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | id: video_overview
 3 | title: Video overview
 4 | sidebar_label: Video overview
 5 | ---
 6 | 
 7 | <div align="center">
 8 |   <img width="80%" src="https://i.imgur.com/Ud2MaDz.gif"/>
 9 | </div>
10 | 


--------------------------------------------------------------------------------
/website/src/pages/api_redirect/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  *
 7 |  * @format
 8 |  */
 9 | import React from 'react';
10 | import BrowserOnly from '@docusaurus/BrowserOnly';
11 | import {useHistory} from 'react-router-dom';
12 | 
13 | const API = () => {
14 |   const history = useHistory();
15 |   history.push('/');
16 |   return (
17 |     <BrowserOnly fallback={<p>Some Fallback Content</p>}>
18 |       {() => {
19 |         window.location.href = '/api';
20 |       }}
21 |     </BrowserOnly>
22 |   );
23 | };
24 | 
25 | export default API;
26 | 


--------------------------------------------------------------------------------
/website/static/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # This config file will prevent tests from being run on the gh-pages branch.
 2 | version: 2
 3 | jobs:
 4 |   build:
 5 |     machine: true
 6 |     branches:
 7 |       ignore: gh-pages
 8 |     steps:
 9 |       -run: echo "Skipping tests on gh-pages branch"
10 | 


--------------------------------------------------------------------------------
/website/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/.nojekyll


--------------------------------------------------------------------------------
/website/static/CNAME:
--------------------------------------------------------------------------------
1 | mmf.sh
2 | 


--------------------------------------------------------------------------------
/website/static/img/boilerplate.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 3545 3545" shape-rendering="geometricPrecision" text-rendering="geometricPrecision" image-rendering="optimizeQuality" fill-rule="evenodd" clip-rule="evenodd">
2 | <style type="text/css">
3 | 	.st0{fill:#0054a6;}
4 | </style>
5 | 
6 | <path class="st0" d="M1082 1549c36-36 94-36 130 0s36 94 0 130L191 2700l654 654 1023-1023c36-36 94-36 130 0s36 94 0 130L964 3495c-33 33-76 49-118 49-40 0-80-14-111-43-3-2-5-4-7-7L51 2817c-33-33-49-76-49-119s16-86 49-119l1033-1033zm1143-359c36-36 94-36 130 0s36 94 0 130l-906 906c-36 36-94 36-130 0s-36-94 0-130l906-906zm244 800c-36 36-94 36-130 0s-36-94 0-130L3354 845l-654-654-1013 1013c-36 36-94 36-130 0s-36-94 0-130L2581 49c33-33 76-49 119-49s86 16 119 49l677 677c33 33 49 76 49 118 0 43-16 86-49 119L2470 1989z" fill-rule="nonzero"/>
7 | </svg>
8 | 


--------------------------------------------------------------------------------
/website/static/img/boilerplate_white.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 3545 3545" shape-rendering="geometricPrecision" text-rendering="geometricPrecision" image-rendering="optimizeQuality" fill-rule="evenodd" clip-rule="evenodd">
2 | <style type="text/css">
3 | 	.st0{fill:#fff;}
4 | </style>
5 | 
6 | <path class="st0" d="M1082 1549c36-36 94-36 130 0s36 94 0 130L191 2700l654 654 1023-1023c36-36 94-36 130 0s36 94 0 130L964 3495c-33 33-76 49-118 49-40 0-80-14-111-43-3-2-5-4-7-7L51 2817c-33-33-49-76-49-119s16-86 49-119l1033-1033zm1143-359c36-36 94-36 130 0s36 94 0 130l-906 906c-36 36-94 36-130 0s-36-94 0-130l906-906zm244 800c-36 36-94 36-130 0s-36-94 0-130L3354 845l-654-654-1013 1013c-36 36-94 36-130 0s-36-94 0-130L2581 49c33-33 76-49 119-49s86 16 119 49l677 677c33 33 49 76 49 118 0 43-16 86-49 119L2470 1989z" fill-rule="nonzero"/>
7 | </svg>
8 | 


--------------------------------------------------------------------------------
/website/static/img/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/favicon.png


--------------------------------------------------------------------------------
/website/static/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/logo.png


--------------------------------------------------------------------------------
/website/static/img/logo_white_f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/logo_white_f.png


--------------------------------------------------------------------------------
/website/static/img/oss_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/oss_logo.png


--------------------------------------------------------------------------------
/website/static/img/pytorch_logo.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 23.0.3, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 900 900" style="enable-background:new 0 0 900 900;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#0054a6;}
 7 | </style>
 8 | <title>pytorch_logo</title>
 9 | <path class="st0" d="M684.3,284.3l-58.6,58.6c97,97.1,97,254.4,0,351.5s-254.4,97-351.5,0s-97-254.4,0-351.5l-0.1-0.1L429,188
10 | 	l20.9-20.9l0.1,0.1V50L215.7,284.3c-129.4,129.4-129.4,339.2,0,468.6s339.2,129.4,468.6,0S813.7,413.7,684.3,284.3z"/>
11 | <path class="st0" d="M597.4,256.2c16.8-16.8,16.8-44.1,0-60.9s-44.1-16.8-60.9,0s-16.8,44.1,0,60.9c0,0,0,0,0,0
12 | 	C553.3,273.1,580.6,273.1,597.4,256.2C597.4,256.3,597.4,256.3,597.4,256.2z"/>
13 | </svg>
14 | 


--------------------------------------------------------------------------------
/website/static/img/pytorch_logo_white.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 23.0.3, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 900 900" style="enable-background:new 0 0 900 900;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#fff;}
 7 | </style>
 8 | <title>pytorch_logo</title>
 9 | <path class="st0" d="M684.3,284.3l-58.6,58.6c97,97.1,97,254.4,0,351.5s-254.4,97-351.5,0s-97-254.4,0-351.5l-0.1-0.1L429,188
10 | 	l20.9-20.9l0.1,0.1V50L215.7,284.3c-129.4,129.4-129.4,339.2,0,468.6s339.2,129.4,468.6,0S813.7,413.7,684.3,284.3z"/>
11 | <path class="st0" d="M597.4,256.2c16.8-16.8,16.8-44.1,0-60.9s-44.1-16.8-60.9,0s-16.8,44.1,0,60.9c0,0,0,0,0,0
12 | 	C553.3,273.1,580.6,273.1,597.4,256.2C597.4,256.3,597.4,256.3,597.4,256.2z"/>
13 | </svg>
14 | 


--------------------------------------------------------------------------------