├── .circleci └── config.yml ├── .editorconfig ├── .flake8 ├── .github ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ ├── bug-report.md │ ├── config.yml │ ├── feature-request.md │ ├── questions-help-support.md │ └── unexpected-problems.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── cpu_test.yaml │ ├── deploy_website.yaml │ └── linter_test.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── MANIFEST.in ├── NOTICES ├── PACKAGE ├── README.md ├── docs ├── .gitignore ├── Makefile ├── license_header.txt ├── requirements.txt └── source │ ├── _static │ ├── css │ │ └── customize.css │ ├── images │ │ ├── chevron-right-orange.svg │ │ ├── chevron_blue.svg │ │ ├── favicon.png │ │ ├── logo_icon.svg │ │ ├── mmf_logo.png │ │ ├── mmf_logo.svg │ │ ├── mmf_logo_white_f.svg │ │ ├── search_icon.svg │ │ └── view-page-source-icon.svg │ └── js │ │ ├── ga.js │ │ └── redirect.js │ ├── _templates │ ├── layout.html │ └── theme_variables.jinja │ ├── conf.py │ ├── index.rst │ └── lib │ ├── common │ ├── registry.rst │ └── sample.rst │ ├── datasets │ ├── base_dataset.rst │ ├── base_dataset_builder.rst │ └── processors.rst │ ├── models │ └── base_model.rst │ ├── modules │ ├── losses.rst │ └── metrics.rst │ └── utils │ └── text.rst ├── mmf ├── __init__.py ├── common │ ├── __init__.py │ ├── batch_collator.py │ ├── constants.py │ ├── dataset_loader.py │ ├── meter.py │ ├── registry.py │ ├── report.py │ ├── sample.py │ ├── test_reporter.py │ └── typings.py ├── configs │ ├── datasets │ │ ├── airstore │ │ │ └── defaults.yaml │ │ ├── charades │ │ │ └── defaults.yaml │ │ ├── clevr │ │ │ └── defaults.yaml │ │ ├── coco │ │ │ ├── defaults.yaml │ │ │ ├── detection.yaml │ │ │ ├── masked.yaml │ │ │ └── ocr_en.yaml │ │ ├── coco2017 │ │ │ └── masked.yaml │ │ ├── conceptual_captions │ │ │ ├── defaults.yaml │ │ │ ├── masked.yaml │ │ │ └── train_small.yaml │ │ ├── flickr30k │ │ │ └── masked.yaml │ │ ├── glue │ │ │ └── defaults.yaml │ │ ├── gqa │ │ │ ├── defaults.yaml │ │ │ └── masked.yaml │ │ ├── hateful_memes │ │ │ ├── bert.yaml │ │ │ ├── defaults.yaml │ │ │ ├── fine_grained │ │ │ │ ├── attack_vectors.yaml │ │ │ │ ├── defaults.yaml │ │ │ │ ├── hateful_pc_attack.yaml │ │ │ │ ├── pc_attack.yaml │ │ │ │ ├── protected_groups.yaml │ │ │ │ └── with_features.yaml │ │ │ └── with_features.yaml │ │ ├── localized_narratives │ │ │ └── masked.yaml │ │ ├── mmimdb │ │ │ ├── defaults.yaml │ │ │ ├── masked.yaml │ │ │ └── with_features.yaml │ │ ├── nlvr2 │ │ │ └── defaults.yaml │ │ ├── ocrvqa │ │ │ └── defaults.yaml │ │ ├── okvqa │ │ │ └── defaults.yaml │ │ ├── retrieval │ │ │ └── flickr30k_defaults.yaml │ │ ├── sbu_captions │ │ │ └── masked.yaml │ │ ├── stvqa │ │ │ └── defaults.yaml │ │ ├── textcaps │ │ │ └── defaults.yaml │ │ ├── textvqa │ │ │ ├── defaults.yaml │ │ │ └── with_resnet.yaml │ │ ├── vinvl │ │ │ └── defaults.yaml │ │ ├── visual_dialog │ │ │ └── defaults.yaml │ │ ├── visual_entailment │ │ │ └── defaults.yaml │ │ ├── visual_genome │ │ │ ├── defaults.yaml │ │ │ ├── detection.yaml │ │ │ └── masked.yaml │ │ ├── vizwiz │ │ │ └── defaults.yaml │ │ ├── vqa2 │ │ │ ├── defaults.yaml │ │ │ ├── masked.yaml │ │ │ ├── masked_q.yaml │ │ │ ├── train_val.yaml │ │ │ └── with_raw_images.yaml │ │ └── vqacp_v2 │ │ │ └── defaults.yaml │ ├── defaults.yaml │ ├── models │ │ ├── alignment │ │ │ └── defaults.yaml │ │ ├── ban │ │ │ └── defaults.yaml │ │ ├── butd │ │ │ └── defaults.yaml │ │ ├── cnn_lstm │ │ │ └── defaults.yaml │ │ ├── fusions │ │ │ ├── concat_bert.yaml │ │ │ ├── concat_bow.yaml │ │ │ ├── defaults.yaml │ │ │ └── late_fusion.yaml │ │ ├── krisp │ │ │ └── defaults.yaml │ │ ├── lorra │ │ │ └── defaults.yaml │ │ ├── lxmert │ │ │ ├── defaults.yaml │ │ │ └── pretrain.yaml │ │ ├── m4c │ │ │ └── defaults.yaml │ │ ├── m4c_captioner │ │ │ └── defaults.yaml │ │ ├── mmbt │ │ │ ├── classification.yaml │ │ │ ├── defaults.yaml │ │ │ ├── pretrain.yaml │ │ │ └── with_features.yaml │ │ ├── mmf_bert │ │ │ └── defaults.yaml │ │ ├── mmf_transformer │ │ │ ├── defaults.yaml │ │ │ ├── pretrain.yaml │ │ │ └── with_audio_video.yaml │ │ ├── movie_mcan │ │ │ └── defaults.yaml │ │ ├── pythia │ │ │ └── defaults.yaml │ │ ├── unimodal │ │ │ ├── bert.yaml │ │ │ ├── image.yaml │ │ │ ├── text.yaml │ │ │ └── with_features.yaml │ │ ├── unit │ │ │ └── defaults.yaml │ │ ├── uniter │ │ │ └── defaults.yaml │ │ ├── vilbert │ │ │ ├── defaults.yaml │ │ │ └── pretrain.yaml │ │ ├── vilt │ │ │ └── defaults.yaml │ │ ├── vinvl │ │ │ └── defaults.yaml │ │ └── visual_bert │ │ │ ├── classification.yaml │ │ │ ├── defaults.yaml │ │ │ └── pretrain.yaml │ └── zoo │ │ ├── datasets.yaml │ │ └── models.yaml ├── datasets │ ├── __init__.py │ ├── base_dataset.py │ ├── base_dataset_builder.py │ ├── builders │ │ ├── __init__.py │ │ ├── airstore │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── charades │ │ │ ├── __init__.py │ │ │ ├── _utils.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── clevr │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── coco │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── dataset.py │ │ │ ├── detection_builder.py │ │ │ ├── detection_dataset.py │ │ │ ├── masked_builder.py │ │ │ └── masked_dataset.py │ │ ├── coco2017 │ │ │ ├── __init__.py │ │ │ ├── masked_builder.py │ │ │ └── masked_dataset.py │ │ ├── conceptual_captions │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── dataset.py │ │ │ ├── masked_builder.py │ │ │ └── masked_dataset.py │ │ ├── flickr30k │ │ │ ├── __init__.py │ │ │ ├── masked_builder.py │ │ │ └── masked_dataset.py │ │ ├── glue │ │ │ ├── __init__.py │ │ │ └── builder.py │ │ ├── gqa │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── dataset.py │ │ │ ├── masked_builder.py │ │ │ └── masked_dataset.py │ │ ├── hateful_memes │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── localized_narratives │ │ │ ├── __init__.py │ │ │ ├── database.py │ │ │ ├── masked_builder.py │ │ │ └── masked_dataset.py │ │ ├── mmimdb │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── dataset.py │ │ │ ├── masked_builder.py │ │ │ └── masked_dataset.py │ │ ├── nlvr2 │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── ocrvqa │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── okvqa │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── database.py │ │ │ └── dataset.py │ │ ├── retrieval │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── dataset.py │ │ │ └── datasets.py │ │ ├── sbu_captions │ │ │ ├── __init__.py │ │ │ ├── masked_builder.py │ │ │ └── masked_dataset.py │ │ ├── stvqa │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── textcaps │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── textvqa │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── vinvl │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── visual_dialog │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── database.py │ │ │ └── dataset.py │ │ ├── visual_entailment │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── visual_genome │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── dataset.py │ │ │ ├── detection_builder.py │ │ │ ├── detection_dataset.py │ │ │ ├── masked_builder.py │ │ │ └── masked_dataset.py │ │ ├── vizwiz │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ ├── vqa2 │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── dataset.py │ │ │ ├── masked_builder.py │ │ │ ├── masked_dataset.py │ │ │ ├── masked_q_vqa2_builder.py │ │ │ ├── masked_q_vqa2_dataset.py │ │ │ ├── ocr_builder.py │ │ │ └── ocr_dataset.py │ │ └── vqacp_v2 │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── database.py │ │ │ └── dataset.py │ ├── concat_dataset.py │ ├── databases │ │ ├── __init__.py │ │ ├── annotation_database.py │ │ ├── features_database.py │ │ ├── image_database.py │ │ ├── readers │ │ │ ├── __init__.py │ │ │ └── feature_readers.py │ │ └── scene_graph_database.py │ ├── iteration_strategies.py │ ├── lightning_multi_datamodule.py │ ├── lightning_multi_dataset_loader.py │ ├── mmf_dataset.py │ ├── mmf_dataset_builder.py │ ├── multi_datamodule.py │ ├── multi_dataset_loader.py │ ├── processors │ │ ├── __init__.py │ │ ├── bert_processors.py │ │ ├── detection_transforms.py │ │ ├── frcnn_processor.py │ │ ├── functional.py │ │ ├── image_processors.py │ │ ├── prediction_processors.py │ │ ├── processors.py │ │ └── video_processors.py │ └── subset_dataset.py ├── models │ ├── __init__.py │ ├── albef │ │ ├── __init__.py │ │ └── vit.py │ ├── alignment.py │ ├── ban.py │ ├── base_model.py │ ├── butd.py │ ├── cnn_lstm.py │ ├── frcnn.py │ ├── fusions.py │ ├── interfaces │ │ ├── __init__.py │ │ └── mmbt.py │ ├── krisp.py │ ├── lorra.py │ ├── lxmert.py │ ├── m4c.py │ ├── m4c_captioner.py │ ├── mmbt.py │ ├── mmf_bert.py │ ├── mmf_transformer.py │ ├── movie_mcan.py │ ├── pythia.py │ ├── top_down_bottom_up.py │ ├── transformers │ │ ├── __init__.py │ │ ├── backends │ │ │ ├── __init__.py │ │ │ └── huggingface.py │ │ ├── base.py │ │ └── heads │ │ │ ├── __init__.py │ │ │ ├── contrastive.py │ │ │ ├── itm.py │ │ │ ├── mlm.py │ │ │ ├── mlp.py │ │ │ ├── mrc.py │ │ │ ├── mrfr.py │ │ │ ├── refiner.py │ │ │ ├── refnet_classifier.py │ │ │ ├── utils.py │ │ │ └── wra.py │ ├── unimodal.py │ ├── unit │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── matcher.py │ │ ├── misc.py │ │ ├── transformer.py │ │ ├── unit.py │ │ └── unit_base_model.py │ ├── uniter.py │ ├── vilbert.py │ ├── vilt.py │ ├── vinvl.py │ ├── visdial_multi_modal.py │ └── visual_bert.py ├── modules │ ├── __init__.py │ ├── attention.py │ ├── bottleneck.py │ ├── decoders.py │ ├── embeddings.py │ ├── encoders.py │ ├── fusions.py │ ├── hf_layers.py │ ├── layers.py │ ├── losses.py │ ├── metrics.py │ ├── optimizers.py │ ├── ot.py │ ├── poolers.py │ ├── schedulers.py │ └── vit.py ├── projects ├── trainers │ ├── __init__.py │ ├── base_trainer.py │ ├── callbacks │ │ ├── __init__.py │ │ ├── base.py │ │ ├── checkpoint.py │ │ ├── early_stopping.py │ │ ├── logistics.py │ │ └── lr_scheduler.py │ ├── core │ │ ├── __init__.py │ │ ├── callback_hook.py │ │ ├── device.py │ │ ├── evaluation_loop.py │ │ ├── profiling.py │ │ └── training_loop.py │ ├── lightning_core │ │ ├── __init__.py │ │ ├── loop_callback.py │ │ ├── loop_callback_with_torchmetrics.py │ │ └── torchmetric.py │ ├── lightning_trainer.py │ └── mmf_trainer.py ├── utils │ ├── __init__.py │ ├── box_ops.py │ ├── build.py │ ├── checkpoint.py │ ├── checkpoint_updater.py │ ├── configuration.py │ ├── dataset.py │ ├── distributed.py │ ├── download.py │ ├── early_stopping.py │ ├── env.py │ ├── features │ │ ├── __init__.py │ │ └── visualizing_image.py │ ├── file_io.py │ ├── flags.py │ ├── general.py │ ├── inference.py │ ├── logger.py │ ├── m4c_evaluators.py │ ├── modeling.py │ ├── patch.py │ ├── phoc │ │ ├── __init__.py │ │ ├── build_phoc.py │ │ └── src │ │ │ └── cphoc.c │ ├── process_answers.py │ ├── text.py │ ├── timer.py │ ├── torchscript.py │ ├── transform.py │ ├── visualize.py │ ├── vocab.py │ └── xla.py └── version.py ├── mmf_cli ├── __init__.py ├── hm_convert.py ├── interactive.py ├── predict.py ├── run.py └── torchx_entryscript.py ├── projects ├── ban │ ├── README.md │ └── configs │ │ ├── textvqa │ │ └── defaults.yaml │ │ ├── vizwiz │ │ └── defaults.yaml │ │ └── vqa2 │ │ └── defaults.yaml ├── butd │ ├── README.md │ └── configs │ │ ├── coco │ │ ├── beam_search.yaml │ │ ├── defaults.yaml │ │ └── nucleus_sampling.yaml │ │ ├── conceptual_captions │ │ ├── beam_search.yaml │ │ ├── defaults.yaml │ │ └── nucleus_sampling.yaml │ │ └── textcaps │ │ ├── beam_search.yaml │ │ ├── defaults.yaml │ │ └── eval_pretrained_coco_model.yaml ├── hateful_memes │ ├── README.md │ ├── configs │ │ ├── concat_bert │ │ │ └── defaults.yaml │ │ ├── concat_bow │ │ │ └── defaults.yaml │ │ ├── late_fusion │ │ │ └── defaults.yaml │ │ ├── mmbt │ │ │ ├── defaults.yaml │ │ │ └── with_features.yaml │ │ ├── mmf_transformer │ │ │ └── defaults.yaml │ │ ├── unimodal │ │ │ ├── bert.yaml │ │ │ ├── image.yaml │ │ │ ├── text.yaml │ │ │ └── with_features.yaml │ │ ├── vilbert │ │ │ ├── defaults.yaml │ │ │ ├── direct.yaml │ │ │ └── from_cc.yaml │ │ └── visual_bert │ │ │ ├── defaults.yaml │ │ │ ├── direct.yaml │ │ │ └── from_coco.yaml │ └── fine_grained │ │ ├── README.md │ │ └── configs │ │ └── visual_bert │ │ ├── attack_vectors.yaml │ │ ├── defaults.yaml │ │ ├── hateful_pc_attack.yaml │ │ ├── multilabel.yaml │ │ ├── pc_attack.yaml │ │ └── protected_groups.yaml ├── krisp │ ├── README.md │ ├── configs │ │ ├── krisp │ │ │ ├── okvqa │ │ │ │ ├── conceptnet_only.yaml │ │ │ │ ├── dbpedia_only.yaml │ │ │ │ ├── defaults.yaml │ │ │ │ ├── haspart_only.yaml │ │ │ │ ├── okvqav10.yaml │ │ │ │ ├── okvqav10_fromfullpretrain.yaml │ │ │ │ ├── randomgraph.yaml │ │ │ │ ├── train_val.yaml │ │ │ │ ├── train_val_cnonly.yaml │ │ │ │ ├── train_val_dbonly.yaml │ │ │ │ ├── train_val_hponly.yaml │ │ │ │ ├── train_val_okvqav10.yaml │ │ │ │ ├── train_val_okvqav10_fromfullpretrain.yaml │ │ │ │ ├── train_val_random.yaml │ │ │ │ ├── train_val_vgonly.yaml │ │ │ │ └── visualgenome_only.yaml │ │ │ └── vqa2 │ │ │ │ └── krisp_pretrain.yaml │ │ └── visual_bert │ │ │ ├── masked_coco │ │ │ └── okvqa_safe.yaml │ │ │ ├── masked_vqa2 │ │ │ └── okvqa_safe.yaml │ │ │ ├── okvqa │ │ │ ├── defaults.yaml │ │ │ ├── defaults_v10.yaml │ │ │ ├── train_val.yaml │ │ │ └── train_val_okvqav10.yaml │ │ │ └── vqa2 │ │ │ └── defaults_okvqasafe.yaml │ ├── graphnetwork_module.py │ └── requirements.txt ├── lorra │ ├── README.md │ └── configs │ │ ├── textvqa │ │ └── defaults.yaml │ │ ├── vizwiz │ │ └── defaults.yaml │ │ └── vqa2 │ │ ├── defaults.yaml │ │ ├── train_val.yaml │ │ └── train_val_resnet_only.yaml ├── lxmert │ ├── README.md │ └── configs │ │ ├── coco │ │ ├── masked.yaml │ │ └── pretrain.yaml │ │ ├── defaults.yaml │ │ ├── gqa │ │ ├── masked.yaml │ │ └── pretrain.yaml │ │ ├── pretrain.yaml │ │ ├── visual_genome │ │ ├── masked.yaml │ │ └── pretrain.yaml │ │ └── vqa2 │ │ ├── defaults.yaml │ │ ├── masked.yaml │ │ └── pretrain.yaml ├── m4c │ ├── README.md │ ├── configs │ │ ├── ocrvqa │ │ │ └── defaults.yaml │ │ ├── stvqa │ │ │ └── defaults.yaml │ │ └── textvqa │ │ │ ├── defaults.yaml │ │ │ ├── joint_with_stvqa.yaml │ │ │ └── ocr_ml.yaml │ └── scripts │ │ ├── __init__.py │ │ └── extract_ocr_frcn_feature.py ├── m4c_captioner │ ├── README.md │ ├── configs │ │ ├── butd │ │ │ └── textcaps │ │ └── m4c_captioner │ │ │ ├── coco │ │ │ ├── defaults.yaml │ │ │ └── eval_on_textcaps.yaml │ │ │ └── textcaps │ │ │ ├── defaults.yaml │ │ │ ├── joint_with_coco.yaml │ │ │ ├── with_caffe2_feat.yaml │ │ │ └── without_ocr.yaml │ └── scripts │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ └── textcaps_eval.py ├── mmbt │ ├── README.md │ └── configs │ │ ├── hateful_memes │ │ ├── defaults.yaml │ │ ├── hateful_with_refiner.yaml │ │ └── with_features.yaml │ │ ├── masked_coco │ │ └── defaults.yaml │ │ ├── mmimdb │ │ ├── defaults.yaml │ │ ├── paper_ablations_reducedlabel.yaml │ │ └── with_features.yaml │ │ ├── okvqa │ │ └── with_images.yaml │ │ └── vqa2 │ │ └── with_raw_images.yaml ├── mmf_transformer │ ├── configs │ │ ├── airstore │ │ │ └── masked_coco.yaml │ │ ├── charades │ │ │ └── direct.yaml │ │ ├── hateful_memes │ │ │ ├── defaults.yaml │ │ │ └── hateful_with_refiner.yaml │ │ ├── masked_coco │ │ │ ├── defaults.yaml │ │ │ └── pretrain_itm.yaml │ │ ├── okvqa │ │ │ └── defaults.yaml │ │ └── vqa2 │ │ │ └── defaults.yaml │ └── localized_narratives │ │ └── masked.yaml ├── movie_mcan │ ├── README.md │ └── configs │ │ └── vqa2 │ │ ├── defaults.yaml │ │ └── e2e.yaml ├── others │ ├── cnn_lstm │ │ ├── clevr │ │ │ └── defaults.yaml │ │ └── hateful_memes │ │ │ └── defaults.yaml │ ├── concat_bert │ │ └── hateful_memes │ │ │ └── defaults.yaml │ ├── concat_bow │ │ └── hateful_memes │ │ │ └── defaults.yaml │ ├── late_fusion │ │ └── hateful_memes │ │ │ └── defaults.yaml │ ├── mmf_bert │ │ └── configs │ │ │ ├── masked_coco │ │ │ ├── defaults.yaml │ │ │ ├── pretrain.yaml │ │ │ └── pretrain_joint_vqa2.yaml │ │ │ ├── masked_conceptual_captions │ │ │ ├── defaults.yaml │ │ │ └── pretrain.yaml │ │ │ ├── masked_vqa2 │ │ │ ├── defaults.yaml │ │ │ └── pretrain.yaml │ │ │ ├── visual_entailment │ │ │ └── defaults.yaml │ │ │ ├── vizwiz │ │ │ └── defaults.yaml │ │ │ └── vqa2 │ │ │ └── defaults.yaml │ └── unimodal │ │ └── configs │ │ └── hateful_memes │ │ ├── bert.yaml │ │ ├── image.yaml │ │ ├── text.yaml │ │ └── with_features.yaml ├── pretrain_vl_right │ ├── README.md │ └── configs │ │ ├── vilbert │ │ ├── masked_coco │ │ │ ├── defaults.yaml │ │ │ ├── fifty_pc.yaml │ │ │ ├── full.yaml │ │ │ └── ten_pc.yaml │ │ ├── masked_conceptual_captions │ │ │ ├── defaults.yaml │ │ │ ├── full.yaml │ │ │ ├── full_coco_generated.yaml │ │ │ ├── half.yaml │ │ │ ├── half_coco_generated.yaml │ │ │ ├── small.yaml │ │ │ ├── small_coco_generated.yaml │ │ │ ├── small_fifty_pc.yaml │ │ │ └── small_ten_pc.yaml │ │ └── masked_vqa2 │ │ │ ├── defaults.yaml │ │ │ ├── fifty_pc.yaml │ │ │ ├── full.yaml │ │ │ └── ten_pc.yaml │ │ └── visual_bert │ │ ├── masked_coco │ │ ├── defaults.yaml │ │ ├── fifty_pc.yaml │ │ ├── full.yaml │ │ ├── full_train_val.yaml │ │ └── ten_pc.yaml │ │ ├── masked_conceptual_captions │ │ ├── defaults.yaml │ │ ├── full.yaml │ │ ├── full_coco_generated.yaml │ │ ├── half.yaml │ │ ├── half_coco_generated.yaml │ │ ├── small.yaml │ │ ├── small_coco_generated.yaml │ │ ├── small_fifty_pc.yaml │ │ └── small_ten_pc.yaml │ │ └── masked_vqa2 │ │ ├── defaults.yaml │ │ ├── fifty_pc.yaml │ │ ├── full.yaml │ │ ├── full_train_val.yaml │ │ └── ten_pc.yaml ├── pythia │ ├── README.md │ └── configs │ │ ├── masked_q_vqa2 │ │ └── defaults.yaml │ │ ├── multihead │ │ └── defaults.yaml │ │ ├── textvqa │ │ └── defaults.yaml │ │ ├── visual_genome │ │ └── defaults.yaml │ │ ├── vizwiz │ │ └── defaults.yaml │ │ └── vqa2 │ │ ├── 12k_iterations_without_resnet.yaml │ │ ├── debug.yaml │ │ ├── defaults.yaml │ │ ├── resnet_only.yaml │ │ ├── train_val.yaml │ │ └── train_val_resnet_only.yaml ├── unit │ ├── README.md │ └── configs │ │ ├── all_8_datasets │ │ ├── separate_dec.yaml │ │ ├── shared_dec.yaml │ │ └── shared_dec_without_task_embedding.yaml │ │ ├── coco │ │ ├── single_task.yaml │ │ └── single_task_without_task_embedding.yaml │ │ ├── coco_vg_vqa2 │ │ ├── separate_dec.yaml │ │ └── shared_dec.yaml │ │ ├── coco_vqa2 │ │ ├── separate_dec.yaml │ │ └── shared_dec.yaml │ │ ├── vg │ │ └── single_task.yaml │ │ ├── vg_vqa2 │ │ ├── separate_dec.yaml │ │ └── shared_dec.yaml │ │ ├── visual_entailment_dataset_cfg.yaml │ │ ├── vqa2 │ │ └── single_task.yaml │ │ └── vqa2_dataset_cfg.yaml ├── uniter │ ├── README.md │ └── configs │ │ ├── masked_coco │ │ └── defaults.yaml │ │ └── vqa2 │ │ └── defaults.yaml ├── vilbert │ ├── README.md │ └── configs │ │ ├── hateful_memes │ │ ├── defaults.yaml │ │ ├── direct.yaml │ │ └── from_cc.yaml │ │ ├── masked_coco │ │ ├── defaults.yaml │ │ ├── pretrain.yaml │ │ └── pretrain_train_val.yaml │ │ ├── masked_conceptual_captions │ │ ├── defaults.yaml │ │ └── pretrain.yaml │ │ ├── masked_vqa2 │ │ ├── defaults.yaml │ │ ├── pretrain.yaml │ │ └── pretrain_train_val.yaml │ │ ├── mmimdb │ │ ├── defaults.yaml │ │ └── pretrain.yaml │ │ ├── nlvr2 │ │ └── defaults.yaml │ │ ├── visual_entailment │ │ └── defaults.yaml │ │ ├── vizwiz │ │ └── defaults.yaml │ │ └── vqa2 │ │ ├── defaults.yaml │ │ └── train_val.yaml ├── vilt │ ├── README.md │ └── configs │ │ ├── masked_coco │ │ ├── defaults.yaml │ │ └── pretrain.yaml │ │ └── vqa2 │ │ ├── defaults.yaml │ │ ├── vit_b16_224.yaml │ │ └── vit_b32_384.yaml ├── vinvl │ ├── README.md │ └── configs │ │ └── vqa2 │ │ └── defaults.yaml └── visual_bert │ ├── README.md │ └── configs │ ├── gqa │ └── defaults.yaml │ ├── hateful_memes │ ├── defaults.yaml │ ├── direct.yaml │ └── from_coco.yaml │ ├── localized_narratives │ ├── defaults.yaml │ └── pretrain.yaml │ ├── masked_coco │ ├── defaults.yaml │ ├── pretrain.yaml │ └── pretrain_train_val.yaml │ ├── masked_conceptual_captions │ ├── defaults.yaml │ └── pretrain.yaml │ ├── masked_gqa │ └── defaults.yaml │ ├── masked_sbu │ ├── defaults.yaml │ └── pretrain.yaml │ ├── masked_vqa2 │ ├── defaults.yaml │ ├── pretrain.yaml │ └── pretrain_train_val.yaml │ ├── mmimdb │ ├── defaults.yaml │ └── pretrain.yaml │ ├── nlvr2 │ └── defaults.yaml │ ├── visual_entailment │ ├── defaults.yaml │ └── train_val.yaml │ ├── vizwiz │ ├── defaults.yaml │ └── train_val.yaml │ └── vqa2 │ ├── defaults.yaml │ ├── train_val.yaml │ └── with_raw_images.yaml ├── pyproject.toml ├── requirements.txt ├── setup.py ├── tests ├── __init__.py ├── common │ ├── __init__.py │ ├── test_batch_collator.py │ ├── test_meter.py │ ├── test_report.py │ └── test_sample.py ├── configs │ ├── __init__.py │ ├── test_configs_for_keys.py │ └── test_zoo_urls.py ├── conftest.py ├── data │ ├── user_dir │ │ ├── __init__.py │ │ ├── configs │ │ │ ├── always_one.yaml │ │ │ ├── experiment.yaml │ │ │ └── simple.yaml │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ └── always_one.py │ │ └── models │ │ │ ├── __init__.py │ │ │ └── simple.py │ └── vocab.txt ├── datasets │ ├── __init__.py │ ├── test_base_dataset.py │ ├── test_bert_processors.py │ ├── test_iteration_strategies.py │ ├── test_mmf_dataset_builder.py │ ├── test_multi_datamodule.py │ ├── test_multi_dataset_loader.py │ ├── test_prediction_processors.py │ └── test_processors.py ├── models │ ├── __init__.py │ ├── interfaces │ │ ├── __init__.py │ │ └── test_interfaces.py │ ├── test_albef.py │ ├── test_cnn_lstm.py │ ├── test_mmbt.py │ ├── test_mmf_transformer.py │ ├── test_uniter.py │ ├── test_vilbert.py │ ├── test_vilt.py │ ├── test_vinvl.py │ ├── test_visual_bert.py │ └── transformers │ │ ├── __init__.py │ │ ├── test_heads.py │ │ └── test_heads_dict.py ├── modules │ ├── __init__.py │ ├── test_encoders.py │ ├── test_fusions.py │ ├── test_hf_layers.py │ ├── test_layers.py │ ├── test_losses.py │ ├── test_metrics.py │ ├── test_optimizers.py │ ├── test_poolers.py │ └── test_vit.py ├── test_utils.py ├── trainers │ ├── __init__.py │ ├── callbacks │ │ ├── __init__.py │ │ ├── test_logistics.py │ │ ├── test_lr_scheduler.py │ │ └── test_user_callback.py │ ├── lightning │ │ ├── __init__.py │ │ ├── lightning_trainer_mock.py │ │ ├── test_checkpoint.py │ │ ├── test_grad_accumulate.py │ │ ├── test_grad_clipping.py │ │ ├── test_logging.py │ │ ├── test_loop_conditions.py │ │ ├── test_loss.py │ │ ├── test_lr_schedule.py │ │ └── test_validation.py │ ├── test_device.py │ ├── test_eval_loop.py │ ├── test_fp16.py │ ├── test_sharded_ddp.py │ ├── test_trainer_mocks.py │ ├── test_training_loop.py │ └── test_utils.py └── utils │ ├── __init__.py │ ├── test_checkpoint.py │ ├── test_configuration.py │ ├── test_distributed.py │ ├── test_download.py │ ├── test_env.py │ ├── test_file_io.py │ ├── test_general.py │ ├── test_logger.py │ ├── test_model.py │ ├── test_patch.py │ ├── test_quality_checks.py │ ├── test_text.py │ ├── test_timer.py │ └── test_visualize.py ├── tools ├── __init__.py ├── scripts │ ├── __init__.py │ ├── bert │ │ ├── extract_bert.sh │ │ └── extract_bert_embeddings.py │ ├── coco │ │ └── coco_caption_eval.py │ ├── features │ │ ├── extract_features_vinvl.py │ │ ├── extract_features_vmb.py │ │ ├── extract_resnet152_feat.py │ │ ├── extraction_utils.py │ │ ├── frcnn │ │ │ ├── extract_features_frcnn.py │ │ │ ├── frcnn_utils.py │ │ │ ├── modeling_frcnn.py │ │ │ └── processing_image.py │ │ └── lmdb_conversion.py │ ├── gqa │ │ ├── README.md │ │ ├── convert_gqa_to_vqa.py │ │ └── extract_vocabulary.py │ ├── tests │ │ └── generate_test_data.py │ └── visual_dialog │ │ ├── build_imdb.py │ │ └── extract_vocabulary.py └── sweeps │ ├── README.md │ ├── lib │ ├── __init__.py │ └── slurm.py │ └── sweep_visual_bert.py └── website ├── .eslintignore ├── .eslintrc.js ├── .gitignore ├── .prettierignore ├── .prettierrc ├── .stylelintrc.js ├── README.md ├── build_docs.sh ├── docs ├── challenges │ ├── hateful_memes_challenge.md │ ├── textvqa_challenge.md │ └── vqa_challenge.md ├── getting_started │ ├── faqs.md │ ├── features.md │ ├── installation.mdx │ ├── quickstart.md │ └── video_overview.md ├── notes │ ├── concepts.md │ ├── configuration.md │ ├── dataset_zoo.md │ ├── logging.md │ ├── model_zoo.md │ ├── pretrained_models.md │ ├── projects.md │ └── training_tricks.md ├── projects │ ├── butd.md │ ├── m4c.md │ ├── m4c_captioner.md │ ├── movie_mcan.md │ ├── unit.md │ ├── uniter.md │ ├── vilt.md │ └── vinvl.md └── tutorials │ ├── checkpointing.md │ ├── concat_bert_tutorial.md │ ├── dataset.md │ ├── image_feature_extraction.md │ ├── image_feature_extraction_vinvl.md │ ├── losses.md │ ├── metrics.md │ ├── processors.md │ ├── pytorchvideo.md │ └── slurm.md ├── docusaurus.config.js ├── package.json ├── sidebars.js ├── src ├── css │ └── custom.css └── pages │ ├── api_redirect │ └── index.js │ ├── index.js │ └── styles.module.css ├── static ├── .circleci │ └── config.yml ├── .nojekyll ├── CNAME └── img │ ├── banner_logo.svg │ ├── boilerplate.svg │ ├── boilerplate_white.svg │ ├── favicon.png │ ├── logo.png │ ├── logo.svg │ ├── logo_white_f.png │ ├── logo_white_f.svg │ ├── logo_white_text.svg │ ├── oss_logo.png │ ├── puzzle_pieces.svg │ ├── puzzle_pieces_white.svg │ ├── pytorch_logo.svg │ ├── pytorch_logo_white.svg │ ├── undraw_docusaurus_react.svg │ └── undraw_docusaurus_tree.svg └── yarn.lock /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.py] 4 | charset = utf-8 5 | trim_trailing_whitespace = true 6 | end_of_line = lf 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 4 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | # This is an example .flake8 config used when developing *Black* itself. 2 | 3 | [flake8] 4 | max-line-length = 88 5 | max-complexity = 18 6 | select = B,C,E,F,W,T4,B9 7 | ignore = E203, E266, C901, C408, W503 8 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | 4 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 5 | Please read the [full text](https://code.fb.com/codeofconduct/) 6 | so that you can understand what actions will and will not be tolerated. 7 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | # require an issue template to be chosen 2 | blank_issues_enabled: false 3 | 4 | contact_links: 5 | - name: MMF Documentation 6 | url: https://mmf.sh/docs 7 | about: Check if your issue/documentation is already answered in docs 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680Feature Request" 3 | about: Submit a proposal/request for a new MMF feature 4 | 5 | --- 6 | 7 | ## 🚀 Feature 8 | 9 | 10 | ## Motivation 11 | 12 | 13 | 14 | ## Pitch 15 | 16 | 17 | 18 | ## Alternatives 19 | 20 | 21 | 22 | ## Additional context 23 | 24 | 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/questions-help-support.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓Questions/Help/Support" 3 | about: Do you need support? 4 | 5 | --- 6 | 7 | ## ❓ Questions and Help 8 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Thanks for your contribution! 2 | 3 | If you're sending a large PR (e.g., >50 lines), please open an issue first about 4 | the feature/bug, and indicate how you want to contribute. 5 | 6 | Use [contributing guidelines](https://github.com/facebookresearch/mmf/tree/main/.github/CONTRIBUTING.md) before opening up the PR to follow MMF style guidelines. 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | *.err 3 | *.pyc 4 | *.swp 5 | .idea/* 6 | **/__pycache__/* 7 | **/output/* 8 | data/.DS_Store 9 | docs/build 10 | results/* 11 | build 12 | dist 13 | boards/* 14 | *.egg-info/ 15 | checkpoint 16 | *.pth 17 | *.ckpt 18 | *_cache 19 | .cache 20 | data 21 | save 22 | *.eggs 23 | .eggs 24 | eggs/ 25 | *.egg 26 | .DS_Store 27 | .vscode 28 | .vscode/* 29 | *.so 30 | *-checkpoint.ipynb 31 | !tests/data 32 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include LICENSE 3 | include NOTICES 4 | recursive-include mmf/configs/ *.yaml 5 | recursive-include projects/ *.yaml 6 | -------------------------------------------------------------------------------- /PACKAGE: -------------------------------------------------------------------------------- 1 | load("@fbcode_macros//build_defs/lib:third_party.bzl", "third_party") 2 | 3 | third_party.gen_overrides({"pypi/transformers": "3.4.0-transitional"}) 4 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | # Needed to ignore pytorch_sphinx_theme requirement clone 2 | src 3 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = mmf 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/license_header.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | recommonmark==0.5.0 2 | sphinx 3 | sphinx_rtd_theme==0.4.3 4 | sphinxcontrib-programoutput==0.16 5 | -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme 6 | -------------------------------------------------------------------------------- /docs/source/_static/images/chevron-right-orange.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | Page 1 7 | Created with Sketch. 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /docs/source/_static/images/chevron_blue.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | Page 1 8 | Created with Sketch. 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /docs/source/_static/images/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/docs/source/_static/images/favicon.png -------------------------------------------------------------------------------- /docs/source/_static/images/mmf_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/docs/source/_static/images/mmf_logo.png -------------------------------------------------------------------------------- /docs/source/_static/images/view-page-source-icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/source/_static/js/ga.js: -------------------------------------------------------------------------------- 1 | window.dataLayer = window.dataLayer || []; 2 | function gtag(){dataLayer.push(arguments);} 3 | gtag('js', new Date()); 4 | 5 | gtag('config', 'UA-135079836-3'); 6 | -------------------------------------------------------------------------------- /docs/source/_static/js/redirect.js: -------------------------------------------------------------------------------- 1 | // Redirect for older pythia documentation 2 | (function(l) { 3 | if (window.location.href.indexOf('readthedocs') !== -1) { 4 | window.location.href = "https://mmf.sh/api"; 5 | } 6 | }(window.location)); 7 | -------------------------------------------------------------------------------- /docs/source/_templates/theme_variables.jinja: -------------------------------------------------------------------------------- 1 | {%- 2 | set external_urls = { 3 | 'github': 'https://github.com/facebookresearch/mmf', 4 | 'github_issues': 'https://github.com/facebookresearch/mmf/issues', 5 | 'contributing': 'https://github.com/facebookresearch/mmf/blob/main/CONTRIBUTING.md', 6 | 'api': 'https://mmf.sh/api', 7 | 'docs': 'https://mmf.sh/docs', 8 | 'previous_pytorch_versions': 'https://mmf.sh/previous-versions/', 9 | 'home': 'https://mmf.sh/', 10 | 'get_started': 'https://mmf.sh/docs', 11 | 'features': 'https://mmf.sh/docs/getting_started/features', 12 | 'brand_guidelines': 'https://pytorch.org/assets/brand-guidelines/PyTorch-Brand-Guidelines.pdf' 13 | } 14 | -%} 15 | {%- 16 | set og = { 17 | 'description': 'API docs for MMF. MMF is a modular framework powered by PyTorch for multimodal vision and language research from Facebook AI Research' 18 | } 19 | -%} 20 | -------------------------------------------------------------------------------- /docs/source/lib/common/registry.rst: -------------------------------------------------------------------------------- 1 | common.registry 2 | =============== 3 | 4 | .. automodule:: mmf.common.registry 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/lib/common/sample.rst: -------------------------------------------------------------------------------- 1 | common.sample 2 | =============== 3 | 4 | .. automodule:: mmf.common.sample 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/lib/datasets/base_dataset.rst: -------------------------------------------------------------------------------- 1 | datasets.base_dataset 2 | ===================== 3 | 4 | .. automodule:: mmf.datasets.base_dataset 5 | :members: 6 | :private-members: 7 | -------------------------------------------------------------------------------- /docs/source/lib/datasets/base_dataset_builder.rst: -------------------------------------------------------------------------------- 1 | datasets.base_dataset_builder 2 | ============================= 3 | 4 | .. automodule:: mmf.datasets.base_dataset_builder 5 | :members: 6 | :private-members: 7 | -------------------------------------------------------------------------------- /docs/source/lib/datasets/processors.rst: -------------------------------------------------------------------------------- 1 | datasets.processors 2 | =================== 3 | 4 | .. automodule:: mmf.datasets.processors.processors 5 | :members: 6 | :private-members: 7 | 8 | .. automodule:: mmf.datasets.processors.image_processors 9 | :members: 10 | :private-members: 11 | 12 | .. automodule:: mmf.datasets.processors.bert_processors 13 | :members: 14 | :private-members: 15 | -------------------------------------------------------------------------------- /docs/source/lib/models/base_model.rst: -------------------------------------------------------------------------------- 1 | models.base_model 2 | ================= 3 | 4 | .. automodule:: mmf.models.base_model 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/lib/modules/losses.rst: -------------------------------------------------------------------------------- 1 | modules.losses 2 | =============== 3 | 4 | .. automodule:: mmf.modules.losses 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/lib/modules/metrics.rst: -------------------------------------------------------------------------------- 1 | modules.metrics 2 | =============== 3 | 4 | .. automodule:: mmf.modules.metrics 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/lib/utils/text.rst: -------------------------------------------------------------------------------- 1 | utils.text 2 | =============== 3 | 4 | .. automodule:: mmf.utils.text 5 | :members: 6 | -------------------------------------------------------------------------------- /mmf/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # isort:skip_file 3 | # flake8: noqa: F401 4 | from mmf.utils.patch import patch_transformers 5 | 6 | patch_transformers() 7 | 8 | from mmf import common, datasets, models, modules, utils 9 | from mmf.modules import losses, metrics, optimizers, poolers, schedulers 10 | from mmf.version import __version__ 11 | 12 | 13 | __all__ = [ 14 | "utils", 15 | "common", 16 | "modules", 17 | "datasets", 18 | "models", 19 | "losses", 20 | "poolers", 21 | "schedulers", 22 | "optimizers", 23 | "metrics", 24 | ] 25 | -------------------------------------------------------------------------------- /mmf/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .meter import Meter 3 | from .registry import registry 4 | from .sample import Sample, SampleList 5 | 6 | 7 | __all__ = ["Sample", "SampleList", "Meter", "registry"] 8 | -------------------------------------------------------------------------------- /mmf/common/batch_collator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.sample import convert_batch_to_sample_list 3 | 4 | 5 | class BatchCollator: 6 | def __init__(self, dataset_name, dataset_type): 7 | self._dataset_name = dataset_name 8 | self._dataset_type = dataset_type 9 | 10 | def __call__(self, batch): 11 | sample_list = convert_batch_to_sample_list(batch) 12 | sample_list.dataset_name = self._dataset_name 13 | sample_list.dataset_type = self._dataset_type 14 | return sample_list 15 | -------------------------------------------------------------------------------- /mmf/common/typings.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from dataclasses import dataclass 3 | from typing import Any, Dict, List 4 | 5 | 6 | @dataclass 7 | class PerSetAttributeType: 8 | train: List[str] 9 | val: List[str] 10 | test: List[str] 11 | 12 | 13 | @dataclass 14 | class ProcessorConfigType: 15 | type: str 16 | params: Dict[str, Any] 17 | 18 | 19 | @dataclass 20 | class MMFDatasetConfigType: 21 | data_dir: str 22 | use_images: bool 23 | use_features: bool 24 | zoo_requirements: List[str] 25 | images: PerSetAttributeType 26 | features: PerSetAttributeType 27 | annotations: PerSetAttributeType 28 | processors: Dict[str, ProcessorConfigType] 29 | -------------------------------------------------------------------------------- /mmf/configs/datasets/clevr/defaults.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | clevr: 3 | data_dir: ${env.data_dir} 4 | data_folder: CLEVR_v1.0 5 | build_attributes: 6 | min_count: 1 7 | split_regex: " " 8 | keep: 9 | - ";" 10 | - "," 11 | remove: 12 | - "?" 13 | - "." 14 | processors: 15 | text_processor: 16 | type: vocab 17 | params: 18 | max_length: 10 19 | vocab: 20 | type: random 21 | vocab_file: vocabs/clevr_question_vocab.txt 22 | preprocessor: 23 | type: simple_sentence 24 | params: {} 25 | answer_processor: 26 | type: multi_hot_answer_from_vocab 27 | params: 28 | num_answers: 1 29 | # Vocab file is relative to [data_dir]/[data_folder] 30 | vocab_file: vocabs/clevr_answer_vocab.txt 31 | preprocessor: 32 | type: simple_word 33 | params: {} 34 | -------------------------------------------------------------------------------- /mmf/configs/datasets/conceptual_captions/train_small.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | conceptual_captions: 3 | annotations: 4 | train: 5 | - cc/defaults/annotations/train_small.npy 6 | -------------------------------------------------------------------------------- /mmf/configs/datasets/hateful_memes/bert.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | hateful_memes: 3 | processors: 4 | text_processor: 5 | type: bert_tokenizer 6 | params: 7 | tokenizer_config: 8 | type: bert-base-uncased 9 | params: 10 | do_lower_case: true 11 | mask_probability: 0 12 | max_seq_length: 128 13 | -------------------------------------------------------------------------------- /mmf/configs/datasets/hateful_memes/fine_grained/attack_vectors.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./with_features.yaml 3 | 4 | dataset_config: 5 | hateful_memes: 6 | fg_dataset_type: attack 7 | is_multilabel: true 8 | processors: 9 | answer_processor: 10 | type: multi_hot_answer_from_vocab 11 | params: 12 | num_answers: 1 13 | vocab_file: hateful_memes/fine_grained/labels/attack_vocab.txt 14 | preprocessor: 15 | type: simple_word 16 | params: {} 17 | -------------------------------------------------------------------------------- /mmf/configs/datasets/hateful_memes/fine_grained/defaults.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | hateful_memes: 3 | zoo_requirements: 4 | - hateful_memes.defaults 5 | - hateful_memes.fine_grained 6 | annotations: 7 | train: 8 | - hateful_memes/fine_grained/annotations/train_fg.jsonl 9 | val: 10 | - hateful_memes/fine_grained/annotations/dev_seen_fg.jsonl 11 | test: 12 | - hateful_memes/fine_grained/annotations/test_seen_fg.jsonl 13 | -------------------------------------------------------------------------------- /mmf/configs/datasets/hateful_memes/fine_grained/hateful_pc_attack.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./with_features.yaml 3 | 4 | dataset_config: 5 | hateful_memes: 6 | fg_dataset_type: hateful_pc_attack 7 | is_multilabel: true 8 | processors: 9 | answer_processor: 10 | type: multi_hot_answer_from_vocab 11 | params: 12 | num_answers: 1 13 | vocab_file: hateful_memes/fine_grained/labels/hateful_pc_attack_vocab.txt 14 | preprocessor: 15 | type: simple_word 16 | params: {} 17 | -------------------------------------------------------------------------------- /mmf/configs/datasets/hateful_memes/fine_grained/pc_attack.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./with_features.yaml 3 | 4 | dataset_config: 5 | hateful_memes: 6 | fg_dataset_type: pc_attack 7 | is_multilabel: true 8 | processors: 9 | answer_processor: 10 | type: multi_hot_answer_from_vocab 11 | params: 12 | num_answers: 1 13 | vocab_file: hateful_memes/fine_grained/labels/pc_attack_vocab.txt 14 | preprocessor: 15 | type: simple_word 16 | params: {} 17 | -------------------------------------------------------------------------------- /mmf/configs/datasets/hateful_memes/fine_grained/protected_groups.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./with_features.yaml 3 | 4 | dataset_config: 5 | hateful_memes: 6 | fg_dataset_type: pc 7 | is_multilabel: true 8 | processors: 9 | answer_processor: 10 | type: multi_hot_answer_from_vocab 11 | params: 12 | num_answers: 1 13 | vocab_file: hateful_memes/fine_grained/labels/pc_vocab.txt 14 | preprocessor: 15 | type: simple_word 16 | params: {} 17 | -------------------------------------------------------------------------------- /mmf/configs/datasets/hateful_memes/fine_grained/with_features.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | hateful_memes: 6 | use_images: false 7 | use_features: true 8 | # Disable this in your config if you do not need features info 9 | # and are running out of memory 10 | return_features_info: true 11 | -------------------------------------------------------------------------------- /mmf/configs/datasets/hateful_memes/with_features.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | hateful_memes: 3 | use_images: false 4 | use_features: true 5 | # Disable this in your config if you do not need features info 6 | # and are running out of memory 7 | return_features_info: true 8 | -------------------------------------------------------------------------------- /mmf/configs/datasets/mmimdb/with_features.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | mmimdb: 3 | use_images: false 4 | use_features: true 5 | # Disable this in your config if you do not need features info 6 | # and are running out of memory 7 | return_features_info: false 8 | -------------------------------------------------------------------------------- /mmf/configs/datasets/textvqa/with_resnet.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | textvqa: 3 | features: 4 | train: 5 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/defaults/features/open_images/resnet152.lmdb 6 | val: 7 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/defaults/features/open_images/resnet152.lmdb 8 | test: 9 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/defaults/features/open_images/resnet152.lmdb 10 | -------------------------------------------------------------------------------- /mmf/configs/datasets/vinvl/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../vqa2/defaults.yaml 3 | 4 | dataset_config: 5 | vinvl: 6 | base_dataset_name: vqa2 7 | label_map: /private/home/ryanjiang/winoground/pretrained_models/VG-SGG-dicts-vgoi6-clipped.json 8 | base_dataset: ${dataset_config.vqa2} 9 | processors: 10 | text_processor: 11 | type: vinvl_text_tokenizer 12 | params: 13 | mask_probability: 0 14 | -------------------------------------------------------------------------------- /mmf/configs/datasets/vqa2/with_raw_images.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | vqa2: 3 | use_images: true 4 | use_features: false 5 | processors: 6 | image_processor: 7 | type: torchvision_transforms 8 | params: 9 | transforms: 10 | - type: Resize 11 | params: 12 | size: [256, 256] 13 | - type: CenterCrop 14 | params: 15 | size: [224, 224] 16 | - ToTensor 17 | - GrayScaleTo3Channels 18 | - type: Normalize 19 | params: 20 | mean: [0.46777044, 0.44531429, 0.40661017] 21 | std: [0.12221994, 0.12145835, 0.14380469] 22 | -------------------------------------------------------------------------------- /mmf/configs/models/ban/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | ban: 3 | losses: 4 | - type: logit_bce 5 | text_embedding: 6 | num_hidden: 1280 7 | vocab_size: 1280 8 | emb_size: 300 9 | num_layers: 1 10 | dropout: 0.0 11 | bidirectional: False 12 | rnn_type: 'GRU' 13 | bilinear_attention: 14 | bc_net: 15 | k: 1 16 | dropout: [0.2, 0.5] 17 | h_out: 18 | fc_net: 19 | dims: 600 20 | activation: 21 | dropout: 0.2 22 | gamma: 4 23 | visual_feat_dim: 2048 24 | classifier: 25 | # out dim will be taken from registry as set by dataset builder 26 | hidden_size: 600 27 | dropout: 0.5 28 | -------------------------------------------------------------------------------- /mmf/configs/models/butd/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | butd: &butd 3 | model_data_dir: ${env.data_dir} 4 | losses: 5 | - type: caption_cross_entropy 6 | classifier: 7 | type: language_decoder 8 | params: 9 | dropout: 0.5 10 | hidden_dim: 1024 11 | feature_dim: 2048 12 | fc_bias_init: 0 13 | image_feature_embeddings: 14 | - modal_combine: 15 | type: top_down_attention_lstm 16 | params: 17 | dropout: 0.5 18 | hidden_dim: 1024 19 | attention_dim: 1024 20 | normalization: softmax 21 | transform: 22 | type: linear 23 | params: 24 | out_dim: 1 25 | image_feature_dim: 2048 26 | embedding_dim: 300 27 | image_feature_encodings: 28 | - type: finetune_faster_rcnn_fpn_fc7 29 | params: 30 | bias_file: models/detectron.defaults/fc7_b.pkl 31 | weights_file: models/detectron.defaults/fc7_w.pkl 32 | model_data_dir: ${model_config.butd.model_data_dir} 33 | inference: 34 | type: greedy 35 | -------------------------------------------------------------------------------- /mmf/configs/models/cnn_lstm/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | cnn_lstm: 3 | losses: 4 | - type: logit_bce 5 | text_embedding: 6 | embedding_dim: 20 7 | lstm: 8 | input_size: 20 9 | hidden_size: 50 10 | bidirectional: true 11 | batch_first: true 12 | cnn: 13 | layers: 14 | input_dims: [3, 64, 128, 128, 64, 64] 15 | output_dims: [64, 128, 128, 64, 64, 10] 16 | kernel_sizes: [7, 5, 5, 5, 5, 1] 17 | classifier: 18 | type: mlp 19 | params: 20 | in_dim: 450 21 | out_dim: 2 22 | -------------------------------------------------------------------------------- /mmf/configs/models/fusions/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./concat_bert.yaml 3 | -------------------------------------------------------------------------------- /mmf/configs/models/lxmert/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/models/lxmert/defaults.yaml 3 | -------------------------------------------------------------------------------- /mmf/configs/models/m4c/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | m4c: 3 | lr_scale_frcn: 0.1 4 | lr_scale_text_bert: 0.1 5 | lr_scale_mmt: 1.0 # no scaling 6 | text_bert_init_from_bert_base: true 7 | text_bert: 8 | num_hidden_layers: 3 9 | obj: 10 | mmt_in_dim: 2048 11 | dropout_prob: 0.1 12 | ocr: 13 | mmt_in_dim: 3002 # 300 (FastText) + 604 (PHOC) + 2048 (Faster R-CNN) + 50 (all zeros; legacy) 14 | dropout_prob: 0.1 15 | mmt: 16 | hidden_size: 768 17 | num_hidden_layers: 4 18 | classifier: 19 | type: linear 20 | ocr_max_num: 50 21 | ocr_ptr_net: 22 | hidden_size: 768 23 | query_key_size: 768 24 | params: {} 25 | model_data_dir: ${env.data_dir} 26 | losses: 27 | - type: m4c_decoding_bce_with_mask 28 | -------------------------------------------------------------------------------- /mmf/configs/models/m4c_captioner/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | m4c_captioner: 3 | lr_scale_frcn: 0.1 4 | lr_scale_text_bert: 0.1 5 | lr_scale_mmt: 1.0 # no scaling 6 | text_bert_init_from_bert_base: true 7 | text_bert: 8 | num_hidden_layers: 3 9 | obj: 10 | mmt_in_dim: 2048 11 | dropout_prob: 0.1 12 | ocr: 13 | mmt_in_dim: 3002 # 300 (FastText) + 604 (PHOC) + 2048 (Faster R-CNN) + 50 (all zeros; legacy) 14 | dropout_prob: 0.1 15 | mmt: 16 | hidden_size: 768 17 | num_hidden_layers: 4 18 | classifier: 19 | type: linear 20 | ocr_max_num: 50 21 | ocr_ptr_net: 22 | hidden_size: 768 23 | query_key_size: 768 24 | params: {} 25 | model_data_dir: ${env.data_dir} 26 | losses: 27 | - type: m4c_decoding_bce_with_mask 28 | remove_unk_in_pred: true 29 | -------------------------------------------------------------------------------- /mmf/configs/models/mmbt/classification.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | mmbt: 3 | training_head_type: classification 4 | num_labels: 2 5 | losses: 6 | - type: cross_entropy 7 | -------------------------------------------------------------------------------- /mmf/configs/models/mmbt/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /mmf/configs/models/mmbt/with_features.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | mmbt: 3 | model_data_dir: ${env.data_dir} 4 | direct_features_input: true 5 | modal_encoder: 6 | type: finetune_faster_rcnn_fpn_fc7 7 | params: 8 | in_dim: 2048 9 | bias_file: models/detectron.defaults/fc7_b.pkl 10 | weights_file: models/detectron.defaults/fc7_w.pkl 11 | model_data_dir: ${model_config.mmbt.model_data_dir} 12 | -------------------------------------------------------------------------------- /mmf/configs/models/mmf_transformer/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/models/mmf_transformer/defaults.yaml 3 | 4 | model_config: 5 | mmf_transformer: 6 | heads: 7 | - type: mlm 8 | freeze: false 9 | lr_multiplier: 1.0 10 | # default for bert base 11 | hidden_size: 768 12 | # default vocab size for bert base 13 | vocab_size: 30522 14 | -------------------------------------------------------------------------------- /mmf/configs/models/movie_mcan/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | movie_mcan: 3 | model_data_dir: ${env.data_dir} 4 | classifier: 5 | type: triple_linear 6 | params: {} 7 | image_feature_embeddings: 8 | type: two_branches 9 | params: 10 | hidden_dim: 1024 11 | cond_dim: 1024 12 | num_attn: 8 13 | dropout: 0.1 14 | num_layers: 6 15 | cbn_num_layers: 4 16 | image_feature_dim: 2048 17 | image_feature_encodings: 18 | type: default 19 | params: 20 | model_data_dir: ${model_config.movie_mcan.model_data_dir} 21 | cond_features: 1024 22 | in_dim: ${model_config.movie_mcan.image_feature_dim} 23 | text_embeddings: 24 | type: mcan 25 | params: 26 | hidden_dim: 1024 27 | embedding_dim: 300 28 | num_attn: 8 29 | dropout: 0.1 30 | num_layers: 6 31 | num_attn_pool: 1 32 | num_feat: 2 33 | model_data_dir: ${model_config.movie_mcan.model_data_dir} 34 | -------------------------------------------------------------------------------- /mmf/configs/models/unimodal/bert.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | unimodal_text: 3 | bert_model_name: bert-base-uncased 4 | text_hidden_size: 768 5 | num_labels: 2 6 | text_encoder: 7 | type: transformer 8 | params: 9 | bert_model_name: ${model_config.unimodal_text.bert_model_name} 10 | hidden_size: 768 11 | num_hidden_layers: 12 12 | num_attention_heads: 12 13 | output_attentions: false 14 | output_hidden_states: false 15 | 16 | classifier: 17 | params: 18 | in_dim: 768 19 | -------------------------------------------------------------------------------- /mmf/configs/models/unimodal/image.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | unimodal_image: 3 | # Either pretraining or classification 4 | direct_features_input: false 5 | freeze_base: false 6 | finetune_lr_multiplier: 1 7 | # Dimension of the embedding finally returned by the modal encoder 8 | modal_hidden_size: 2048 9 | # Used when classification head is activated 10 | num_labels: 2 11 | modal_encoder: 12 | type: resnet152 13 | params: 14 | pretrained: true 15 | pool_type: avg 16 | num_output_features: 1 17 | 18 | classifier: 19 | type: mlp 20 | params: 21 | in_dim: 2048 22 | out_dim: 2 23 | hidden_dim: 768 24 | num_layers: 0 25 | -------------------------------------------------------------------------------- /mmf/configs/models/unimodal/text.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | unimodal_text: 3 | # Either pretraining or classification 4 | bert_model_name: bert-base-uncased 5 | freeze_base: false 6 | finetune_lr_multiplier: 1 7 | # Dimension of the embedding finally returned by the text encoder 8 | text_hidden_size: 300 9 | # Used when classification head is activated 10 | num_labels: 2 11 | text_encoder: 12 | type: embedding 13 | params: 14 | operator: sum 15 | embedding_params: 16 | type: vocab 17 | params: 18 | type: intersected 19 | embedding_name: glove.6B.300d 20 | embedding_dim: 300 21 | data_dir: ${env.data_dir} 22 | vocab_file: vocabs/vocabulary_100k.txt 23 | 24 | classifier: 25 | type: mlp 26 | params: 27 | in_dim: 300 28 | out_dim: 2 29 | hidden_dim: 768 30 | num_layers: 0 31 | -------------------------------------------------------------------------------- /mmf/configs/models/unimodal/with_features.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | unimodal_image: 3 | model_data_dir: ${env.data_dir} 4 | direct_features_input: true 5 | modal_encoder: 6 | type: finetune_faster_rcnn_fpn_fc7 7 | params: 8 | in_dim: 2048 9 | bias_file: models/detectron.defaults/fc7_b.pkl 10 | weights_file: models/detectron.defaults/fc7_w.pkl 11 | model_data_dir: ${model_config.unimodal_image.model_data_dir} 12 | num_output_features: 1 13 | -------------------------------------------------------------------------------- /mmf/configs/models/uniter/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | uniter: 3 | heads: 4 | vqa2: 5 | type: mlp 6 | freeze: false 7 | lr_multiplier: 1.0 8 | in_dim: 768 9 | hidden_size: 1536 10 | num_labels: 3129 11 | pooler_name: bert_pooler 12 | text_embeddings: 13 | type: bert_embeddings 14 | image_embeddings: 15 | type: uniter_image_embeddings 16 | params: 17 | name: 'uniter_image_embeddings' 18 | encoder: 19 | type: transformer 20 | params: 21 | bert_model_name: bert-base-uncased 22 | hidden_size: 768 23 | num_hidden_layers: 12 24 | num_attention_heads: 12 25 | output_attentions: false 26 | output_hidden_states: false 27 | tasks: 28 | - vqa2 29 | -------------------------------------------------------------------------------- /mmf/configs/models/vilbert/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/models/vilbert/defaults.yaml 3 | -------------------------------------------------------------------------------- /mmf/configs/models/vinvl/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | vinvl: 3 | heads: 4 | test: 5 | type: mlp 6 | freeze: false 7 | lr_multiplier: 1.0 8 | in_dim: 768 9 | hidden_size: 1536 10 | num_labels: 3129 11 | pooler_name: bert_pooler 12 | bert_model_name: bert-base-uncased 13 | loss_type: sfmx 14 | img_feature_dim: 2054 15 | img_feature_type: 'frcnn' 16 | use_img_layernorm: 1 17 | img_layer_norm_eps: 1e-12 18 | max_img_seq_len: 70 19 | -------------------------------------------------------------------------------- /mmf/configs/models/visual_bert/classification.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | visual_bert: 3 | training_head_type: classification 4 | -------------------------------------------------------------------------------- /mmf/configs/models/visual_bert/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | visual_bert: 3 | bert_model_name: bert-base-uncased 4 | training_head_type: pretraining 5 | visual_embedding_dim: 2048 6 | special_visual_initialize: true 7 | embedding_strategy: plain 8 | bypass_transformer: false 9 | output_attentions: false 10 | output_hidden_states: false 11 | random_initialize: false 12 | freeze_base: false 13 | finetune_lr_multiplier: 1 14 | # Default points to BERT pooler strategy which is to take 15 | # representation of CLS token after passing it through a dense layer 16 | pooler_strategy: default 17 | zerobias: false # Initialize last layer to predict closer to 0 on init for sigmoid outputs 18 | -------------------------------------------------------------------------------- /mmf/configs/models/visual_bert/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/models/visual_bert/defaults.yaml 3 | -------------------------------------------------------------------------------- /mmf/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import processors 3 | from .base_dataset import BaseDataset 4 | from .base_dataset_builder import BaseDatasetBuilder 5 | from .concat_dataset import ConcatDataset 6 | from .lightning_multi_datamodule import LightningMultiDataModule 7 | from .lightning_multi_dataset_loader import LightningMultiDataLoader 8 | from .mmf_dataset import MMFDataset 9 | from .mmf_dataset_builder import MMFDatasetBuilder 10 | from .multi_dataset_loader import MultiDatasetLoader 11 | 12 | 13 | __all__ = [ 14 | "processors", 15 | "BaseDataset", 16 | "BaseDatasetBuilder", 17 | "ConcatDataset", 18 | "MultiDatasetLoader", 19 | "MMFDataset", 20 | "MMFDatasetBuilder", 21 | "LightningMultiDataModule", 22 | "LightningMultiDataLoader", 23 | ] 24 | -------------------------------------------------------------------------------- /mmf/datasets/builders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/airstore/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.utils.env import import_files 4 | 5 | 6 | import_files(__file__, "mmf.datasets.builders.airstore") 7 | -------------------------------------------------------------------------------- /mmf/datasets/builders/airstore/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.common.registry import registry 4 | from mmf.datasets.builders.airstore.dataset import AirstoreDataset 5 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder 6 | 7 | 8 | @registry.register_builder("airstore") 9 | class AirstoreDatasetBuilder(MMFDatasetBuilder): 10 | def __init__( 11 | self, dataset_name="airstore", dataset_class=AirstoreDataset, *args, **kwargs 12 | ): 13 | super().__init__(dataset_name) 14 | self.dataset_class = AirstoreDataset 15 | 16 | @classmethod 17 | def config_path(cls): 18 | return "configs/datasets/airstore/defaults.yaml" 19 | -------------------------------------------------------------------------------- /mmf/datasets/builders/charades/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/charades/__init__.py -------------------------------------------------------------------------------- /mmf/datasets/builders/charades/builder.py: -------------------------------------------------------------------------------- 1 | from mmf.common.registry import registry 2 | from mmf.datasets.builders.charades.dataset import CharadesDataset 3 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder 4 | 5 | 6 | @registry.register_builder("charades") 7 | class CharadesBuilder(MMFDatasetBuilder): 8 | def __init__( 9 | self, dataset_name="charades", dataset_class=CharadesDataset, *args, **kwargs 10 | ): 11 | super().__init__(dataset_name) 12 | self.dataset_class = CharadesDataset 13 | 14 | @classmethod 15 | def config_path(cls): 16 | return "configs/datasets/charades/defaults.yaml" 17 | -------------------------------------------------------------------------------- /mmf/datasets/builders/clevr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/clevr/__init__.py -------------------------------------------------------------------------------- /mmf/datasets/builders/coco/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | __all__ = [ 3 | "COCOBuilder", 4 | "COCODataset", 5 | "DetectionCOCOBuilder", 6 | "DetectionCOCODataset", 7 | "MaskedCOCOBuilder", 8 | "MaskedCOCODataset", 9 | ] 10 | 11 | from .builder import COCOBuilder 12 | from .dataset import COCODataset 13 | from .detection_builder import DetectionCOCOBuilder 14 | from .detection_dataset import DetectionCOCODataset 15 | from .masked_builder import MaskedCOCOBuilder 16 | from .masked_dataset import MaskedCOCODataset 17 | -------------------------------------------------------------------------------- /mmf/datasets/builders/coco/detection_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import registry 3 | from mmf.datasets.builders.coco.detection_dataset import DetectionCOCODataset 4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder 5 | 6 | 7 | @registry.register_builder("detection_coco") 8 | class DetectionCOCOBuilder(MMFDatasetBuilder): 9 | def __init__(self): 10 | super().__init__( 11 | dataset_name="detection_coco", dataset_class=DetectionCOCODataset 12 | ) 13 | 14 | @classmethod 15 | def config_path(cls): 16 | return "configs/datasets/coco/detection.yaml" 17 | -------------------------------------------------------------------------------- /mmf/datasets/builders/coco/masked_builder.py: -------------------------------------------------------------------------------- 1 | from mmf.common.registry import registry 2 | from mmf.datasets.builders.coco.builder import COCOBuilder 3 | 4 | from .masked_dataset import MaskedCOCODataset 5 | 6 | 7 | @registry.register_builder("masked_coco") 8 | class MaskedCOCOBuilder(COCOBuilder): 9 | def __init__(self): 10 | super().__init__() 11 | self.dataset_name = "masked_coco" 12 | self.set_dataset_class(MaskedCOCODataset) 13 | 14 | def update_registry_for_model(self, config): 15 | registry.register( 16 | self.dataset_name + "_text_vocab_size", 17 | self.dataset.masked_token_processor.get_vocab_size(), 18 | ) 19 | 20 | @classmethod 21 | def config_path(cls): 22 | return "configs/datasets/coco/masked.yaml" 23 | -------------------------------------------------------------------------------- /mmf/datasets/builders/coco2017/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/coco2017/__init__.py -------------------------------------------------------------------------------- /mmf/datasets/builders/coco2017/masked_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import registry 3 | from mmf.datasets.builders.coco2017.masked_dataset import MaskedCoco2017Dataset 4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder 5 | 6 | 7 | @registry.register_builder("masked_coco2017") 8 | class MaskedFlickr30kBuilder(MMFDatasetBuilder): 9 | def __init__( 10 | self, 11 | dataset_name="masked_coco2017", 12 | dataset_class=MaskedCoco2017Dataset, 13 | *args, 14 | **kwargs, 15 | ): 16 | super().__init__(dataset_name, dataset_class, *args, **kwargs) 17 | 18 | @classmethod 19 | def config_path(cls): 20 | return "configs/datasets/coco2017/masked.yaml" 21 | -------------------------------------------------------------------------------- /mmf/datasets/builders/coco2017/masked_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.common.typings import MMFDatasetConfigType 4 | from mmf.datasets.builders.localized_narratives.masked_dataset import ( 5 | MaskedLocalizedNarrativesDatasetMixin, 6 | ) 7 | from mmf.datasets.mmf_dataset import MMFDataset 8 | 9 | 10 | class MaskedCoco2017Dataset(MaskedLocalizedNarrativesDatasetMixin, MMFDataset): 11 | def __init__( 12 | self, 13 | config: MMFDatasetConfigType, 14 | dataset_type: str, 15 | index: int, 16 | *args, 17 | **kwargs, 18 | ): 19 | super().__init__( 20 | "masked_coco2017", config, dataset_type, index, *args, **kwargs 21 | ) 22 | -------------------------------------------------------------------------------- /mmf/datasets/builders/conceptual_captions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | __all__ = [ 3 | "ConceptualCaptionsBuilder", 4 | "ConceptualCaptionsDataset", 5 | "MaskedConceptualCaptionsBuilder", 6 | "MaskedConceptualCaptionsDataset", 7 | ] 8 | 9 | from .builder import ConceptualCaptionsBuilder 10 | from .dataset import ConceptualCaptionsDataset 11 | from .masked_builder import MaskedConceptualCaptionsBuilder 12 | from .masked_dataset import MaskedConceptualCaptionsDataset 13 | -------------------------------------------------------------------------------- /mmf/datasets/builders/conceptual_captions/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.common.registry import registry 4 | from mmf.datasets.builders.coco import COCOBuilder 5 | 6 | from .dataset import ConceptualCaptionsDataset 7 | 8 | 9 | @registry.register_builder("conceptual_captions") 10 | class ConceptualCaptionsBuilder(COCOBuilder): 11 | def __init__(self): 12 | super().__init__() 13 | self.dataset_name = "conceptual_captions" 14 | self.set_dataset_class(ConceptualCaptionsDataset) 15 | 16 | @classmethod 17 | def config_path(cls): 18 | return "configs/datasets/conceptual_captions/defaults.yaml" 19 | -------------------------------------------------------------------------------- /mmf/datasets/builders/conceptual_captions/masked_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import registry 3 | from mmf.datasets.builders.coco import MaskedCOCOBuilder 4 | 5 | from .masked_dataset import MaskedConceptualCaptionsDataset 6 | 7 | 8 | @registry.register_builder("masked_conceptual_captions") 9 | class MaskedConceptualCaptionsBuilder(MaskedCOCOBuilder): 10 | def __init__(self): 11 | super().__init__() 12 | self.dataset_name = "masked_conceptual_captions" 13 | self.set_dataset_class(MaskedConceptualCaptionsDataset) 14 | 15 | @classmethod 16 | def config_path(cls): 17 | return "configs/datasets/conceptual_captions/masked.yaml" 18 | -------------------------------------------------------------------------------- /mmf/datasets/builders/conceptual_captions/masked_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.datasets.builders.coco import MaskedCOCODataset 4 | 5 | 6 | class MaskedConceptualCaptionsDataset(MaskedCOCODataset): 7 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs): 8 | super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs) 9 | self.dataset_name = "masked_conceptual_captions" 10 | self._two_sentence = config.get("two_sentence", True) 11 | self._false_caption = config.get("false_caption", True) 12 | self._two_sentence_probability = config.get("two_sentence_probability", 0.5) 13 | self._false_caption_probability = config.get("false_caption_probability", 0.5) 14 | -------------------------------------------------------------------------------- /mmf/datasets/builders/flickr30k/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/flickr30k/__init__.py -------------------------------------------------------------------------------- /mmf/datasets/builders/flickr30k/masked_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import registry 3 | from mmf.datasets.builders.flickr30k.masked_dataset import MaskedFlickr30kDataset 4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder 5 | 6 | 7 | @registry.register_builder("masked_flickr30k") 8 | class MaskedFlickr30kBuilder(MMFDatasetBuilder): 9 | def __init__( 10 | self, 11 | dataset_name="masked_flickr30k", 12 | dataset_class=MaskedFlickr30kDataset, 13 | *args, 14 | **kwargs, 15 | ): 16 | super().__init__(dataset_name, dataset_class, *args, **kwargs) 17 | 18 | @classmethod 19 | def config_path(cls): 20 | return "configs/datasets/flickr30k/masked.yaml" 21 | -------------------------------------------------------------------------------- /mmf/datasets/builders/flickr30k/masked_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.common.typings import MMFDatasetConfigType 4 | from mmf.datasets.builders.localized_narratives.masked_dataset import ( 5 | MaskedLocalizedNarrativesDatasetMixin, 6 | ) 7 | from mmf.datasets.mmf_dataset import MMFDataset 8 | 9 | 10 | class MaskedFlickr30kDataset(MaskedLocalizedNarrativesDatasetMixin, MMFDataset): 11 | def __init__( 12 | self, 13 | config: MMFDatasetConfigType, 14 | dataset_type: str, 15 | index: int, 16 | *args, 17 | **kwargs, 18 | ): 19 | super().__init__( 20 | "masked_flickr30k", config, dataset_type, index, *args, **kwargs 21 | ) 22 | -------------------------------------------------------------------------------- /mmf/datasets/builders/glue/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/gqa/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | __all__ = ["GQABuilder", "GQADataset", "MaskedGQABuilder", "MaskedGQADataset"] 4 | 5 | from .builder import GQABuilder 6 | from .dataset import GQADataset 7 | from .masked_builder import MaskedGQABuilder 8 | from .masked_dataset import MaskedGQADataset 9 | -------------------------------------------------------------------------------- /mmf/datasets/builders/gqa/masked_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.common.registry import registry 4 | from mmf.datasets.builders.gqa.builder import GQABuilder 5 | from mmf.datasets.builders.gqa.masked_dataset import MaskedGQADataset 6 | 7 | 8 | @registry.register_builder("masked_gqa") 9 | class MaskedGQABuilder(GQABuilder): 10 | def __init__(self): 11 | super().__init__() 12 | self.dataset_name = "masked_gqa" 13 | self.dataset_class = MaskedGQADataset 14 | 15 | @classmethod 16 | def config_path(cls): 17 | return "configs/datasets/gqa/masked.yaml" 18 | -------------------------------------------------------------------------------- /mmf/datasets/builders/hateful_memes/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/localized_narratives/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/localized_narratives/__init__.py -------------------------------------------------------------------------------- /mmf/datasets/builders/localized_narratives/masked_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import registry 3 | from mmf.datasets.builders.localized_narratives.masked_dataset import ( 4 | MaskedLocalizedNarrativesDataset, 5 | ) 6 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder 7 | 8 | 9 | @registry.register_builder("masked_localized_narratives") 10 | class MaskedLocalizedNarrativesBuilder(MMFDatasetBuilder): 11 | def __init__( 12 | self, 13 | dataset_name="masked_localized_narratives", 14 | dataset_class=MaskedLocalizedNarrativesDataset, 15 | *args, 16 | **kwargs, 17 | ): 18 | super().__init__(dataset_name, dataset_class, *args, **kwargs) 19 | 20 | @classmethod 21 | def config_path(cls): 22 | return "configs/datasets/localized_narratives/masked.yaml" 23 | -------------------------------------------------------------------------------- /mmf/datasets/builders/mmimdb/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/mmimdb/masked_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | from mmf.common.registry import registry 9 | from mmf.datasets.builders.mmimdb.masked_dataset import MaskedMMImdbDataset 10 | from mmf.datasets.builders.vqa2.builder import VQA2Builder 11 | 12 | 13 | @registry.register_builder("masked_mmimdb") 14 | class MaskedMMImdbBuilder(VQA2Builder): 15 | def __init__(self): 16 | super().__init__() 17 | self.dataset_name = "masked_mmimdb" 18 | self.dataset_class = MaskedMMImdbDataset 19 | 20 | @classmethod 21 | def config_path(cls): 22 | return "configs/datasets/mmimdb/masked.yaml" 23 | -------------------------------------------------------------------------------- /mmf/datasets/builders/nlvr2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/nlvr2/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | from mmf.common.registry import registry 9 | from mmf.datasets.builders.nlvr2.dataset import NLVR2Dataset 10 | from mmf.datasets.builders.vqa2.builder import VQA2Builder 11 | 12 | 13 | @registry.register_builder("nlvr2") 14 | class NLVR2Builder(VQA2Builder): 15 | def __init__(self): 16 | super().__init__() 17 | self.dataset_name = "nlvr2" 18 | self.dataset_class = NLVR2Dataset 19 | 20 | @classmethod 21 | def config_path(cls): 22 | return "configs/datasets/nlvr2/defaults.yaml" 23 | -------------------------------------------------------------------------------- /mmf/datasets/builders/ocrvqa/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/ocrvqa/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import Registry 3 | from mmf.datasets.builders.ocrvqa.dataset import OCRVQADataset 4 | from mmf.datasets.builders.textvqa.builder import TextVQABuilder 5 | 6 | 7 | @Registry.register_builder("ocrvqa") 8 | class OCRVQABuilder(TextVQABuilder): 9 | def __init__(self): 10 | super().__init__() 11 | self.dataset_name = "ocrvqa" 12 | self.set_dataset_class(OCRVQADataset) 13 | 14 | @classmethod 15 | def config_path(cls): 16 | return "configs/datasets/ocrvqa/defaults.yaml" 17 | -------------------------------------------------------------------------------- /mmf/datasets/builders/ocrvqa/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.datasets.builders.textvqa.dataset import TextVQADataset 3 | 4 | 5 | class OCRVQADataset(TextVQADataset): 6 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs): 7 | super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs) 8 | self.dataset_name = "ocrvqa" 9 | 10 | def preprocess_sample_info(self, sample_info): 11 | # Do nothing in this case 12 | return sample_info 13 | -------------------------------------------------------------------------------- /mmf/datasets/builders/okvqa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/okvqa/__init__.py -------------------------------------------------------------------------------- /mmf/datasets/builders/okvqa/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import registry 3 | from mmf.datasets.builders.okvqa.dataset import OKVQADataset 4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder 5 | 6 | 7 | @registry.register_builder("okvqa") 8 | class OKVQABuilder(MMFDatasetBuilder): 9 | def __init__( 10 | self, dataset_name="okvqa", dataset_class=OKVQADataset, *args, **kwargs 11 | ): 12 | super().__init__(dataset_name, dataset_class, *args, **kwargs) 13 | 14 | @classmethod 15 | def config_path(cls): 16 | return "configs/datasets/okvqa/defaults.yaml" 17 | -------------------------------------------------------------------------------- /mmf/datasets/builders/retrieval/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | __all__ = ["RetrievalDataset", "RetrievalBuilder"] 4 | 5 | from .builder import RetrievalBuilder 6 | from .dataset import RetrievalDataset 7 | -------------------------------------------------------------------------------- /mmf/datasets/builders/retrieval/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | 9 | from mmf.common.registry import registry 10 | from mmf.datasets.builders.retrieval.dataset import RetrievalDataset 11 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder 12 | 13 | 14 | @registry.register_builder("retrieval") 15 | class RetrievalBuilder(MMFDatasetBuilder): 16 | def __init__( 17 | self, dataset_name="retrieval", dataset_class=RetrievalDataset, *args, **kwargs 18 | ): 19 | super().__init__(dataset_name, dataset_class, *args, **kwargs) 20 | 21 | 22 | @classmethod 23 | def config_path(cls): 24 | return "config/datasets/retrieval/flickr30k_defaults.yaml" 25 | -------------------------------------------------------------------------------- /mmf/datasets/builders/sbu_captions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | __all__ = ["MaskedSBUBuilder", "MaskedSBUDataset"] 4 | 5 | from .masked_builder import MaskedSBUBuilder 6 | from .masked_dataset import MaskedSBUDataset 7 | -------------------------------------------------------------------------------- /mmf/datasets/builders/sbu_captions/masked_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.common.registry import registry 4 | from mmf.datasets.builders.coco import MaskedCOCOBuilder 5 | 6 | from .masked_dataset import MaskedSBUDataset 7 | 8 | 9 | @registry.register_builder("masked_sbu") 10 | class MaskedSBUBuilder(MaskedCOCOBuilder): 11 | def __init__(self): 12 | super().__init__() 13 | self.dataset_name = "masked_sbu" 14 | self.set_dataset_class(MaskedSBUDataset) 15 | 16 | @classmethod 17 | def config_path(cls): 18 | return "configs/datasets/sbu_captions/masked.yaml" 19 | -------------------------------------------------------------------------------- /mmf/datasets/builders/sbu_captions/masked_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.datasets.builders.coco import MaskedCOCODataset 4 | 5 | 6 | class MaskedSBUDataset(MaskedCOCODataset): 7 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs): 8 | super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs) 9 | self.dataset_name = "masked_sbu" 10 | self._two_sentence = config.get("two_sentence", True) 11 | self._false_caption = config.get("false_caption", True) 12 | self._two_sentence_probability = config.get("two_sentence_probability", 0.5) 13 | self._false_caption_probability = config.get("false_caption_probability", 0.5) 14 | -------------------------------------------------------------------------------- /mmf/datasets/builders/stvqa/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/stvqa/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import Registry 3 | from mmf.datasets.builders.stvqa.dataset import STVQADataset 4 | from mmf.datasets.builders.textvqa.builder import TextVQABuilder 5 | 6 | 7 | @Registry.register_builder("stvqa") 8 | class STVQABuilder(TextVQABuilder): 9 | def __init__(self): 10 | super().__init__() 11 | self.dataset_name = "stvqa" 12 | self.set_dataset_class(STVQADataset) 13 | 14 | @classmethod 15 | def config_path(cls): 16 | return "configs/datasets/stvqa/defaults.yaml" 17 | -------------------------------------------------------------------------------- /mmf/datasets/builders/stvqa/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.datasets.builders.textvqa.dataset import TextVQADataset 3 | 4 | 5 | class STVQADataset(TextVQADataset): 6 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs): 7 | super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs) 8 | self.dataset_name = "stvqa" 9 | 10 | def preprocess_sample_info(self, sample_info): 11 | feature_path = sample_info["feature_path"] 12 | append = "train" 13 | 14 | if self.dataset_type == "test": 15 | append = "test_task3" 16 | 17 | if not feature_path.startswith(append): 18 | feature_path = append + "/" + feature_path 19 | 20 | sample_info["feature_path"] = feature_path 21 | return sample_info 22 | -------------------------------------------------------------------------------- /mmf/datasets/builders/textcaps/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/textvqa/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/vinvl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | __all__ = ["VinVLBuilder", "VinVLDataset"] 3 | 4 | from .builder import VinVLBuilder 5 | from .dataset import VinVLDataset 6 | -------------------------------------------------------------------------------- /mmf/datasets/builders/visual_dialog/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/visual_dialog/dataset.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | 4 | import torch 5 | from mmf.common.sample import Sample 6 | from mmf.datasets.builders.visual_dialog.database import VisualDialogDatabase 7 | from mmf.datasets.builders.vqa2 import VQA2Dataset 8 | 9 | 10 | class VisualDialogDataset(VQA2Dataset): 11 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs): 12 | super().__init__( 13 | config, 14 | dataset_type, 15 | imdb_file_index, 16 | dataset_name="visual_dialog", 17 | *args, 18 | **kwargs, 19 | ) 20 | 21 | discriminative = config.discriminative 22 | self._discriminative = discriminative.enabled 23 | self._return_indices = discriminative.return_indices 24 | self._no_unk = config.no_unk 25 | self._return_history = config.return_history 26 | -------------------------------------------------------------------------------- /mmf/datasets/builders/visual_entailment/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/visual_entailment/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | from mmf.common.registry import registry 9 | from mmf.datasets.builders.visual_entailment.dataset import VisualEntailmentDataset 10 | from mmf.datasets.builders.vqa2.builder import VQA2Builder 11 | 12 | 13 | @registry.register_builder("visual_entailment") 14 | class VisualEntailmentBuilder(VQA2Builder): 15 | def __init__(self): 16 | super().__init__() 17 | self.dataset_name = "visual_entailment" 18 | self.dataset_class = VisualEntailmentDataset 19 | 20 | @classmethod 21 | def config_path(cls): 22 | return "configs/datasets/visual_entailment/defaults.yaml" 23 | -------------------------------------------------------------------------------- /mmf/datasets/builders/visual_genome/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/builders/visual_genome/detection_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import registry 3 | from mmf.datasets.builders.visual_genome.detection_dataset import ( 4 | DetectionVisualGenomeDataset, 5 | ) 6 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder 7 | 8 | 9 | @registry.register_builder("detection_visual_genome") 10 | class DetectionVisualGenomeBuilder(MMFDatasetBuilder): 11 | def __init__(self): 12 | super().__init__( 13 | dataset_name="detection_visual_genome", 14 | dataset_class=DetectionVisualGenomeDataset, 15 | ) 16 | 17 | @classmethod 18 | def config_path(cls): 19 | return "configs/datasets/visual_genome/detection.yaml" 20 | -------------------------------------------------------------------------------- /mmf/datasets/builders/visual_genome/detection_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.datasets.builders.coco.detection_dataset import DetectionCOCODataset 3 | 4 | 5 | class DetectionVisualGenomeDataset(DetectionCOCODataset): 6 | def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs): 7 | super().__init__(config, dataset_type, imdb_file_index, *args, **kwargs) 8 | if "name" in kwargs: 9 | name = kwargs["name"] 10 | elif "dataset_name" in kwargs: 11 | name = kwargs["dataset_name"] 12 | else: 13 | name = "detection_visual_genome" 14 | self.dataset_name = name 15 | -------------------------------------------------------------------------------- /mmf/datasets/builders/visual_genome/masked_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.common.registry import registry 4 | from mmf.datasets.builders.visual_genome.builder import VisualGenomeBuilder 5 | from mmf.datasets.builders.visual_genome.masked_dataset import MaskedVisualGenomeDataset 6 | 7 | 8 | @registry.register_builder("masked_visual_genome") 9 | class MaskedVisualGenomeBuilder(VisualGenomeBuilder): 10 | def __init__(self): 11 | super().__init__() 12 | self.dataset_name = "masked_visual_genome" 13 | self.dataset_class = MaskedVisualGenomeDataset 14 | 15 | @classmethod 16 | def config_path(cls): 17 | return "configs/datasets/visual_genome/masked.yaml" 18 | -------------------------------------------------------------------------------- /mmf/datasets/builders/vizwiz/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .builder import VizWizBuilder 3 | from .dataset import VizWizDataset 4 | 5 | 6 | __all__ = ["VizWizBuilder", "VizWizDataset"] 7 | -------------------------------------------------------------------------------- /mmf/datasets/builders/vizwiz/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import registry 3 | from mmf.datasets.builders.vizwiz.dataset import VizWizDataset 4 | from mmf.datasets.builders.vqa2 import VQA2Builder 5 | 6 | 7 | @registry.register_builder("vizwiz") 8 | class VizWizBuilder(VQA2Builder): 9 | def __init__(self): 10 | super().__init__() 11 | self.dataset_name = "vizwiz" 12 | self.set_dataset_class(VizWizDataset) 13 | 14 | @classmethod 15 | def config_path(cls): 16 | return "configs/datasets/vizwiz/defaults.yaml" 17 | 18 | def update_registry_for_model(self, config): 19 | super().update_registry_for_model(config) 20 | -------------------------------------------------------------------------------- /mmf/datasets/builders/vqa2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | __all__ = ["VQA2Builder", "VQA2Dataset"] 3 | 4 | from .builder import VQA2Builder 5 | from .dataset import VQA2Dataset 6 | -------------------------------------------------------------------------------- /mmf/datasets/builders/vqa2/masked_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | from mmf.common.registry import registry 9 | from mmf.datasets.builders.vqa2.builder import VQA2Builder 10 | from mmf.datasets.builders.vqa2.masked_dataset import MaskedVQA2Dataset 11 | 12 | 13 | @registry.register_builder("masked_vqa2") 14 | class MaskedVQA2Builder(VQA2Builder): 15 | def __init__(self): 16 | super().__init__() 17 | self.dataset_name = "masked_vqa2" 18 | self.dataset_class = MaskedVQA2Dataset 19 | 20 | @classmethod 21 | def config_path(cls): 22 | return "configs/datasets/vqa2/masked.yaml" 23 | -------------------------------------------------------------------------------- /mmf/datasets/builders/vqa2/masked_q_vqa2_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | 9 | import os 10 | import warnings 11 | 12 | from mmf.common.registry import registry 13 | from mmf.datasets.builders.vqa2.builder import VQA2Builder 14 | from mmf.datasets.builders.vqa2.masked_q_vqa2_dataset import MaskedQVQA2Dataset 15 | from mmf.datasets.concat_dataset import MMFConcatDataset 16 | 17 | 18 | @registry.register_builder("masked_q_vqa2") 19 | class MaskedQVQA2Builder(VQA2Builder): 20 | def __init__(self): 21 | super().__init__() 22 | self.dataset_name = "masked_q_vqa2" 23 | self.dataset_class = MaskedQVQA2Dataset 24 | 25 | @classmethod 26 | def config_path(cls): 27 | return "configs/datasets/vqa2/masked_q.yaml" 28 | -------------------------------------------------------------------------------- /mmf/datasets/builders/vqa2/ocr_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import Registry 3 | from mmf.datasets.builders.vizwiz import VizWizBuilder 4 | from mmf.datasets.builders.vqa2.ocr_dataset import VQA2OCRDataset 5 | 6 | 7 | @Registry.register_builder("vqa2_ocr") 8 | class TextVQABuilder(VizWizBuilder): 9 | def __init__(self): 10 | super().__init__() 11 | self.dataset_name = "VQA2_OCR" 12 | self.set_dataset_class(VQA2OCRDataset) 13 | 14 | @classmethod 15 | def config_path(self): 16 | return None 17 | -------------------------------------------------------------------------------- /mmf/datasets/builders/vqacp_v2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/mmf/datasets/builders/vqacp_v2/__init__.py -------------------------------------------------------------------------------- /mmf/datasets/builders/vqacp_v2/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import registry 3 | from mmf.datasets.builders.vqacp_v2.dataset import VQACPv2Dataset 4 | from mmf.datasets.mmf_dataset_builder import MMFDatasetBuilder 5 | 6 | 7 | @registry.register_builder("vqacp_v2") 8 | class VQACPv2Builder(MMFDatasetBuilder): 9 | def __init__( 10 | self, dataset_name="vqacp_v2", dataset_class=VQACPv2Dataset, *args, **kwargs 11 | ): 12 | super().__init__(dataset_name, dataset_class, *args, **kwargs) 13 | 14 | @classmethod 15 | def config_path(cls): 16 | return "configs/datasets/vqacp_v2/defaults.yaml" 17 | -------------------------------------------------------------------------------- /mmf/datasets/databases/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import mmf.datasets.databases.readers # noqa 3 | 4 | from .annotation_database import AnnotationDatabase 5 | from .features_database import FeaturesDatabase 6 | from .image_database import ImageDatabase 7 | from .scene_graph_database import SceneGraphDatabase 8 | 9 | 10 | __all__ = [ 11 | "AnnotationDatabase", 12 | "FeaturesDatabase", 13 | "ImageDatabase", 14 | "SceneGraphDatabase", 15 | ] 16 | -------------------------------------------------------------------------------- /mmf/datasets/databases/readers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/datasets/databases/scene_graph_database.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.datasets.databases.annotation_database import AnnotationDatabase 3 | 4 | 5 | class SceneGraphDatabase(AnnotationDatabase): 6 | def __init__(self, config, scene_graph_path, *args, **kwargs): 7 | super().__init__(config, scene_graph_path, *args, **kwargs) 8 | self.data_dict = {} 9 | for item in self.data: 10 | self.data_dict[item["image_id"]] = item 11 | 12 | def __getitem__(self, idx): 13 | return self.data_dict[idx] 14 | -------------------------------------------------------------------------------- /mmf/datasets/subset_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from torch.utils.data.dataset import Subset 4 | 5 | 6 | class MMFSubset(Subset): 7 | def __init__(self, dataset, indices): 8 | super().__init__(dataset, indices) 9 | self._dir_representation = dir(self) 10 | 11 | def __getattr__(self, name): 12 | if "_dir_representation" in self.__dict__ and name in self._dir_representation: 13 | return getattr(self, name) 14 | elif "dataset" in self.__dict__ and hasattr(self.dataset, name): 15 | return getattr(self.dataset, name) 16 | else: 17 | raise AttributeError(name) 18 | -------------------------------------------------------------------------------- /mmf/models/albef/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import mmf.models.albef.vit # noqa 3 | -------------------------------------------------------------------------------- /mmf/models/interfaces/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/models/m4c_captioner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.common.registry import registry 3 | from mmf.models.m4c import M4C 4 | 5 | 6 | @registry.register_model("m4c_captioner") 7 | class M4CCaptioner(M4C): 8 | def __init__(self, config): 9 | super().__init__(config) 10 | self.remove_unk_in_pred = self.config.remove_unk_in_pred 11 | 12 | @classmethod 13 | def config_path(cls): 14 | return "configs/models/m4c_captioner/defaults.yaml" 15 | 16 | def _forward_output(self, sample_list, fwd_results): 17 | super()._forward_output(sample_list, fwd_results) 18 | 19 | if self.remove_unk_in_pred: 20 | # avoid outputting in the generated captions 21 | fwd_results["scores"][..., self.answer_processor.UNK_IDX] = -1e10 22 | 23 | return fwd_results 24 | -------------------------------------------------------------------------------- /mmf/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import mmf.models.transformers.backends # noqa 4 | from mmf.models.transformers.base import ( # noqa 5 | BaseTransformer, 6 | BaseTransformerBackend, 7 | BaseTransformerBackendConfig, 8 | BaseTransformerHead, 9 | BaseTransformerInput, 10 | BaseTransformerModalityConfig, 11 | ) 12 | -------------------------------------------------------------------------------- /mmf/models/transformers/backends/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.utils.env import import_files 4 | 5 | 6 | import_files(__file__, "mmf.models.transformers.backends") 7 | -------------------------------------------------------------------------------- /mmf/models/transformers/heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.utils.env import import_files 4 | 5 | 6 | import_files(__file__, "mmf.models.transformers.heads") 7 | -------------------------------------------------------------------------------- /mmf/models/unit/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | __all__ = ["UniT"] 3 | 4 | from .unit import UniT 5 | -------------------------------------------------------------------------------- /mmf/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import mmf.modules.losses # noqa 3 | import mmf.modules.metrics # noqa 4 | import mmf.modules.optimizers # noqa 5 | import mmf.modules.schedulers # noqa 6 | -------------------------------------------------------------------------------- /mmf/projects: -------------------------------------------------------------------------------- 1 | ../projects -------------------------------------------------------------------------------- /mmf/trainers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | __all__ = ["BaseTrainer"] 3 | 4 | from .base_trainer import BaseTrainer 5 | -------------------------------------------------------------------------------- /mmf/trainers/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/trainers/callbacks/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.trainers.callbacks.base import Callback 4 | from mmf.utils.build import build_scheduler 5 | 6 | 7 | class LRSchedulerCallback(Callback): 8 | """Callback which executes a LR scheduler. It is executed after every 9 | batch iteration. 10 | """ 11 | 12 | def __init__(self, config, trainer): 13 | """ 14 | Attr: 15 | config(mmf_typings.DictConfig): Config for the callback 16 | trainer(Type[BaseTrainer]): Trainer object 17 | """ 18 | super().__init__(config, trainer) 19 | 20 | self._scheduler = None 21 | if self.training_config.lr_scheduler is True: 22 | self._scheduler = build_scheduler(trainer.optimizer, self.config) 23 | 24 | def on_update_end(self, **kwargs): 25 | if self._scheduler is not None: 26 | self._scheduler.step() 27 | -------------------------------------------------------------------------------- /mmf/trainers/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/trainers/core/profiling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import logging 4 | import threading 5 | from abc import ABC 6 | from typing import Type 7 | 8 | from mmf.utils.timer import Timer 9 | 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class TrainerProfilingMixin(ABC): 15 | profiler: Type[Timer] = Timer() 16 | 17 | def profile(self, text: str) -> None: 18 | if self.training_config.logger_level != "debug": 19 | return 20 | logging.debug( 21 | f"tid={threading.current_thread().ident}, {text}: {self.profiler.get_time_since_start()}" 22 | ) 23 | self.profiler.reset() 24 | -------------------------------------------------------------------------------- /mmf/trainers/lightning_core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/utils/features/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf/utils/file_io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from iopath.common.file_io import PathManager as pm 4 | 5 | 6 | PathManager = pm() 7 | 8 | try: 9 | # [FB only] register internal file IO handlers 10 | from mmf.utils.fb.file_io_handlers import register_handlers 11 | 12 | register_handlers(PathManager) 13 | except ImportError: 14 | pass 15 | -------------------------------------------------------------------------------- /mmf/utils/phoc/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from .build_phoc import build_phoc # NoQA 4 | -------------------------------------------------------------------------------- /mmf/utils/phoc/build_phoc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .cphoc import build_phoc as _build_phoc_raw 4 | 5 | 6 | _alphabet = { 7 | "a", 8 | "b", 9 | "c", 10 | "d", 11 | "e", 12 | "f", 13 | "g", 14 | "h", 15 | "i", 16 | "j", 17 | "k", 18 | "l", 19 | "m", 20 | "n", 21 | "o", 22 | "p", 23 | "q", 24 | "r", 25 | "s", 26 | "t", 27 | "u", 28 | "v", 29 | "w", 30 | "x", 31 | "y", 32 | "z", 33 | "0", 34 | "1", 35 | "2", 36 | "3", 37 | "4", 38 | "5", 39 | "6", 40 | "7", 41 | "8", 42 | "9", 43 | } # NoQA 44 | 45 | 46 | def build_phoc(token): 47 | token = token.lower().strip() 48 | token = "".join([c for c in token if c in _alphabet]) 49 | phoc = _build_phoc_raw(token) 50 | phoc = np.array(phoc, dtype=np.float32) 51 | return phoc 52 | -------------------------------------------------------------------------------- /mmf/utils/torchscript.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from typing import Dict, Optional 4 | 5 | from torch import Tensor 6 | 7 | 8 | def getattr_torchscriptable( 9 | dictionary: Dict[str, Tensor], key: str, default: Optional[Tensor] = None 10 | ) -> Optional[Tensor]: 11 | if key in dictionary: 12 | return dictionary[key] 13 | else: 14 | return default 15 | -------------------------------------------------------------------------------- /mmf/utils/transform.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from torch import Tensor 4 | 5 | 6 | def transform_to_batch_sequence(tensor: Tensor) -> Tensor: 7 | if len(tensor.size()) == 2: 8 | return tensor 9 | else: 10 | assert len(tensor.size()) == 3 11 | return tensor.contiguous().view(-1, tensor.size(-1)) 12 | 13 | 14 | def transform_to_batch_sequence_dim(tensor: Tensor) -> Tensor: 15 | if len(tensor.size()) == 3: 16 | return tensor 17 | else: 18 | assert len(tensor.size()) == 4 19 | return tensor.contiguous().view(-1, tensor.size(-2), tensor.size(-1)) 20 | -------------------------------------------------------------------------------- /mmf/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import sys 4 | 5 | 6 | __version__ = "1.0.0rc12" 7 | 8 | msg = "MMF is only compatible with Python 3.6 and newer." 9 | 10 | 11 | if sys.version_info < (3, 6): 12 | raise ImportError(msg) 13 | -------------------------------------------------------------------------------- /mmf_cli/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /mmf_cli/predict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 -u 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | import sys 5 | 6 | from mmf_cli.run import run 7 | 8 | 9 | def predict(opts=None): 10 | if opts is None: 11 | sys.argv.extend(["evaluation.predict=true"]) 12 | else: 13 | opts.extend(["evaluation.predict=true"]) 14 | 15 | run(predict=True) 16 | 17 | 18 | if __name__ == "__main__": 19 | predict() 20 | -------------------------------------------------------------------------------- /mmf_cli/torchx_entryscript.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | """ 3 | Entrypoint script used by TorchX to start the training run in each process 4 | """ 5 | 6 | from mmf_cli.fb_run import fb_scheduler_run 7 | 8 | 9 | if __name__ == "__main__": 10 | fb_scheduler_run() 11 | -------------------------------------------------------------------------------- /projects/ban/README.md: -------------------------------------------------------------------------------- 1 | # BAN 2 | 3 | This repository contains the code for BAN model. Please cite the following paper if you are using BAN model from mmf: 4 | 5 | * Kim, J. H., Jun, J., & Zhang, B. T. (2018). *Bilinear attention networks*. In Advances in Neural Information Processing Systems (pp. 1564-1574). ([arXiV](https://arxiv.org/abs/1805.07932)) 6 | ``` 7 | @inproceedings{kim2018bilinear, 8 | title={Bilinear attention networks}, 9 | author={Kim, Jin-Hwa and Jun, Jaehyun and Zhang, Byoung-Tak}, 10 | booktitle={Advances in Neural Information Processing Systems}, 11 | pages={1564--1574}, 12 | year={2018} 13 | } 14 | ``` 15 | 16 | ## Installation 17 | 18 | Follow installation instructions in the [documentation](https://mmf.readthedocs.io/en/latest/notes/installation.html). 19 | 20 | ## Training 21 | To train BAN model on the VQA2 dataset, run the following command 22 | ``` 23 | mmf_run config=projects/ban/configs/vqa2/defaults.yaml run_type=train_val dataset=vqa2 model=ban 24 | ``` 25 | -------------------------------------------------------------------------------- /projects/ban/configs/textvqa/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../../../mmf/configs/textvqa/defaults.yaml 3 | 4 | evaluation: 5 | metrics: 6 | - vqa_accuracy 7 | 8 | training: 9 | early_stop: 10 | criteria: textvqa/vqa_accuracy 11 | minimize: false 12 | -------------------------------------------------------------------------------- /projects/ban/configs/vizwiz/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../../../mmf/configs/vizwiz/defaults.yaml 3 | 4 | evaluation: 5 | metrics: 6 | - vqa_accuracy 7 | 8 | training: 9 | early_stop: 10 | criteria: vizwiz/vqa_accuracy 11 | minimize: false 12 | -------------------------------------------------------------------------------- /projects/ban/configs/vqa2/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../../../mmf/configs/vqa2/defaults.yaml 3 | 4 | evaluation: 5 | metrics: 6 | - vqa_accuracy 7 | 8 | training: 9 | early_stop: 10 | criteria: vqa2/vqa_accuracy 11 | minimize: false 12 | -------------------------------------------------------------------------------- /projects/butd/README.md: -------------------------------------------------------------------------------- 1 | # BUTD 2 | 3 | This repository contains the code for pytorch implementation of BUTD model, released originally under this ([repo](https://github.com/peteanderson80/bottom-up-attention)). Please cite the following paper if you are using BUTD model from mmf: 4 | 5 | * Anderson, P., He, X., Buehler, C., Teney, D., Johnson, M., Gould, S., & Zhang, L. (2018). *Bottom-up and top-down attention for image captioning and visual question answering*. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 6077-6086). ([arXiV](https://arxiv.org/abs/1707.07998)) 6 | ``` 7 | @inproceedings{Anderson2017up-down, 8 | author = {Peter Anderson and Xiaodong He and Chris Buehler and Damien Teney and Mark Johnson and Stephen Gould and Lei Zhang}, 9 | title = {Bottom-Up and Top-Down Attention for Image Captioning and Visual Question Answering}, 10 | booktitle={CVPR}, 11 | year = {2018} 12 | } 13 | ``` 14 | 15 | Please see [https://mmf.sh/docs/projects/butd](https://mmf.sh/docs/projects/butd) for more details on how to use BUTD model. 16 | -------------------------------------------------------------------------------- /projects/butd/configs/coco/beam_search.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | butd: 6 | inference: 7 | type: beam_search 8 | params: 9 | beam_length: 5 10 | 11 | training: 12 | batch_size: 1 13 | -------------------------------------------------------------------------------- /projects/butd/configs/coco/defaults.yaml: -------------------------------------------------------------------------------- 1 | optimizer: 2 | type: Adamax 3 | params: 4 | eps: 1.0e-08 5 | lr: 0.01 6 | weight_decay: 0 7 | 8 | evaluation: 9 | metrics: 10 | - caption_bleu4 11 | 12 | training: 13 | clip_norm_mode: all 14 | clip_gradients: true 15 | lr_ratio: 0.1 16 | lr_scheduler: true 17 | lr_steps: 18 | - 15000 19 | - 25000 20 | - 35000 21 | - 45000 22 | max_grad_l2_norm: 0.25 23 | max_updates: 50000 24 | use_warmup: true 25 | warmup_factor: 0.2 26 | warmup_iterations: 1000 27 | batch_size: 256 28 | num_workers: 7 29 | task_size_proportional_sampling: true 30 | early_stop: 31 | criteria: coco/caption_bleu4 32 | minimize: false 33 | -------------------------------------------------------------------------------- /projects/butd/configs/coco/nucleus_sampling.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | butd: 6 | inference: 7 | type: nucleus_sampling 8 | params: 9 | sum_threshold: 0.8 10 | 11 | training: 12 | batch_size: 1 13 | -------------------------------------------------------------------------------- /projects/butd/configs/conceptual_captions/beam_search.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | butd: 6 | inference: 7 | type: nucleus_sampling 8 | params: 9 | sum_threshold: 0.8 10 | 11 | training: 12 | batch_size: 1 13 | -------------------------------------------------------------------------------- /projects/butd/configs/conceptual_captions/defaults.yaml: -------------------------------------------------------------------------------- 1 | optimizer: 2 | type: Adamax 3 | params: 4 | eps: 1.0e-08 5 | lr: 0.01 6 | weight_decay: 0 7 | 8 | evaluation: 9 | metrics: 10 | - caption_bleu4 11 | 12 | training: 13 | clip_norm_mode: all 14 | clip_gradients: true 15 | lr_ratio: 0.1 16 | lr_scheduler: true 17 | lr_steps: 18 | - 15000 19 | - 25000 20 | - 35000 21 | - 45000 22 | max_grad_l2_norm: 0.25 23 | max_updates: 50000 24 | use_warmup: true 25 | warmup_factor: 0.2 26 | warmup_iterations: 1000 27 | batch_size: 256 28 | num_workers: 7 29 | task_size_proportional_sampling: true 30 | early_stop: 31 | criteria: conceptual_captions/caption_bleu4 32 | minimize: false 33 | -------------------------------------------------------------------------------- /projects/butd/configs/conceptual_captions/nucleus_sampling.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | butd: 6 | inference: 7 | type: nucleus_sampling 8 | params: 9 | sum_threshold: 0.8 10 | 11 | training: 12 | batch_size: 1 13 | -------------------------------------------------------------------------------- /projects/butd/configs/textcaps/beam_search.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | textcaps: 6 | zoo_requirements: 7 | - textvqa.defaults 8 | - textcaps.defaults 9 | annotations: 10 | val: 11 | - textcaps/defaults/annotations/imdb_val_filtered_by_image_id.npy 12 | 13 | model_config: 14 | butd: &butd 15 | inference: 16 | type: beam_search 17 | params: 18 | beam_length: 5 19 | 20 | training: 21 | batch_size: 1 22 | -------------------------------------------------------------------------------- /projects/butd/configs/textcaps/eval_pretrained_coco_model.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./beam_search.yaml 3 | 4 | dataset_config: 5 | textcaps: 6 | processors: 7 | text_processor: 8 | params: 9 | vocab: 10 | vocab_file: textcaps/defaults/extras/vocabs/coco_vocabulary_captioning_thresh5.txt 11 | caption_processor: 12 | params: 13 | vocab: 14 | vocab_file: textcaps/defaults/extras/vocabs/coco_vocabulary_captioning_thresh5.txt 15 | -------------------------------------------------------------------------------- /projects/hateful_memes/configs/concat_bert/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../others/concat_bert/hateful_memes/defaults.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/concat_bow/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../others/concat_bow/hateful_memes/defaults.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/late_fusion/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../others/late_fusion/hateful_memes/defaults.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/mmbt/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../mmbt/configs/hateful_memes/defaults.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/mmbt/with_features.yaml: -------------------------------------------------------------------------------- 1 | ../../../mmbt/configs/hateful_memes/with_features.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/mmf_transformer/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../../../mmf_transformer/configs/hateful_memes/defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/hateful_memes/configs/unimodal/bert.yaml: -------------------------------------------------------------------------------- 1 | ../../../others/unimodal/configs/hateful_memes/bert.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/unimodal/image.yaml: -------------------------------------------------------------------------------- 1 | ../../../others/unimodal/configs/hateful_memes/image.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/unimodal/text.yaml: -------------------------------------------------------------------------------- 1 | ../../../others/unimodal/configs/hateful_memes/text.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/unimodal/with_features.yaml: -------------------------------------------------------------------------------- 1 | ../../../others/unimodal/configs/hateful_memes/with_features.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/vilbert/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../vilbert/configs/hateful_memes/defaults.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/vilbert/direct.yaml: -------------------------------------------------------------------------------- 1 | ../../../vilbert/configs/hateful_memes/direct.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/vilbert/from_cc.yaml: -------------------------------------------------------------------------------- 1 | ../../../vilbert/configs/hateful_memes/from_cc.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/visual_bert/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../visual_bert/configs/hateful_memes/defaults.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/visual_bert/direct.yaml: -------------------------------------------------------------------------------- 1 | ../../../visual_bert/configs/hateful_memes/direct.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/configs/visual_bert/from_coco.yaml: -------------------------------------------------------------------------------- 1 | ../../../visual_bert/configs/hateful_memes/from_coco.yaml -------------------------------------------------------------------------------- /projects/hateful_memes/fine_grained/configs/visual_bert/attack_vectors.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./multilabel.yaml 3 | - configs/datasets/hateful_memes/fine_grained/attack_vectors.yaml 4 | 5 | model_config: 6 | visual_bert: 7 | num_labels: 9 8 | -------------------------------------------------------------------------------- /projects/hateful_memes/fine_grained/configs/visual_bert/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../../../configs/visual_bert/defaults.yaml 3 | - configs/datasets/hateful_memes/fine_grained/with_features.yaml 4 | 5 | training: 6 | find_unused_parameters: false 7 | batch_size: 128 8 | max_updates: 10000 9 | -------------------------------------------------------------------------------- /projects/hateful_memes/fine_grained/configs/visual_bert/hateful_pc_attack.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./multilabel.yaml 3 | - configs/datasets/hateful_memes/fine_grained/hateful_pc_attack.yaml 4 | 5 | model_config: 6 | visual_bert: 7 | num_labels: 17 8 | -------------------------------------------------------------------------------- /projects/hateful_memes/fine_grained/configs/visual_bert/multilabel.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | visual_bert: 6 | training_head_type: classification 7 | num_labels: 9 8 | losses: 9 | - type: logit_bce 10 | 11 | evaluation: 12 | metrics: 13 | - accuracy 14 | - multilabel_macro_f1 15 | - multilabel_micro_f1 16 | 17 | training: 18 | find_unused_parameters: false 19 | early_stop: 20 | criteria: hateful_memes/multilabel_micro_f1 21 | minimize: false 22 | -------------------------------------------------------------------------------- /projects/hateful_memes/fine_grained/configs/visual_bert/pc_attack.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./multilabel.yaml 3 | - configs/datasets/hateful_memes/fine_grained/pc_attack.yaml 4 | 5 | model_config: 6 | visual_bert: 7 | num_labels: 15 8 | -------------------------------------------------------------------------------- /projects/hateful_memes/fine_grained/configs/visual_bert/protected_groups.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./multilabel.yaml 3 | - configs/datasets/hateful_memes/fine_grained/protected_groups.yaml 4 | 5 | model_config: 6 | visual_bert: 7 | num_labels: 7 8 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/conceptnet_only.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | krisp: 6 | graph_module: 7 | kg_path: okvqa/defaults/annotations/annotations/graphs/cn_graph.pth.tar 8 | node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_cn.pkl 9 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_cn.pth.tar 10 | dataset_config: 11 | okvqa: 12 | processors: 13 | answer_processor: 14 | params: 15 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_cn.pth.tar 16 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/dbpedia_only.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | krisp: 6 | graph_module: 7 | kg_path: okvqa/defaults/annotations/annotations/graphs/db_graph.pth.tar 8 | node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_db.pkl 9 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_db.pth.tar 10 | dataset_config: 11 | okvqa: 12 | processors: 13 | answer_processor: 14 | params: 15 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_db.pth.tar 16 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/haspart_only.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | krisp: 6 | graph_module: 7 | kg_path: okvqa/defaults/annotations/annotations/graphs/hp_graph.pth.tar 8 | node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_hp.pkl 9 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_hp.pth.tar 10 | dataset_config: 11 | okvqa: 12 | processors: 13 | answer_processor: 14 | params: 15 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_hp.pth.tar 16 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/okvqav10.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | krisp: 6 | graph_module: 7 | vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt 8 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10.pth.tar 9 | okvqa_v_mode: "v1.0-121" 10 | old_graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab.pth.tar 11 | ans_translation_file: okvqa/defaults/annotations/annotations/ans_vocab_tx.pth.tar 12 | num_labels: 2253 13 | num_labels: 2253 14 | dataset_config: 15 | okvqa: 16 | processors: 17 | answer_processor: 18 | params: 19 | vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt 20 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10.pth.tar 21 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/okvqav10_fromfullpretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | krisp: 6 | graph_module: 7 | vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt 8 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10_fp.pth.tar 9 | okvqa_v_mode: "v1.0-121-mc" 10 | old_graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab.pth.tar 11 | ans_translation_file: okvqa/defaults/annotations/annotations/ans_vocab_tx.pth.tar 12 | num_labels: 2253 13 | num_labels: 2253 14 | dataset_config: 15 | okvqa: 16 | processors: 17 | answer_processor: 18 | params: 19 | vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt 20 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_v10_fp.pth.tar 21 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/randomgraph.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | krisp: 6 | graph_module: 7 | kg_path: okvqa/defaults/annotations/annotations/graphs/random_graph.pth.tar 8 | node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_random.pkl 9 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_random.pth.tar 10 | dataset_config: 11 | okvqa: 12 | processors: 13 | answer_processor: 14 | params: 15 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_random.pth.tar 16 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/train_val.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | okvqa: 6 | annotations: 7 | train: 8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy 9 | val: 10 | - okvqa/defaults/annotations/annotations/imdb_test.npy 11 | test: 12 | - okvqa/defaults/annotations/annotations/imdb_test.npy 13 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/train_val_cnonly.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./conceptnet_only.yaml 3 | 4 | dataset_config: 5 | okvqa: 6 | annotations: 7 | train: 8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy 9 | val: 10 | - okvqa/defaults/annotations/annotations/imdb_test.npy 11 | test: 12 | - okvqa/defaults/annotations/annotations/imdb_test.npy 13 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/train_val_dbonly.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./dbpedia_only.yaml 3 | 4 | dataset_config: 5 | okvqa: 6 | annotations: 7 | train: 8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy 9 | val: 10 | - okvqa/defaults/annotations/annotations/imdb_test.npy 11 | test: 12 | - okvqa/defaults/annotations/annotations/imdb_test.npy 13 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/train_val_hponly.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./haspart_only.yaml 3 | 4 | dataset_config: 5 | okvqa: 6 | annotations: 7 | train: 8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy 9 | val: 10 | - okvqa/defaults/annotations/annotations/imdb_test.npy 11 | test: 12 | - okvqa/defaults/annotations/annotations/imdb_test.npy 13 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/train_val_okvqav10.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./okvqav10.yaml 3 | 4 | dataset_config: 5 | okvqa: 6 | annotations: 7 | train: 8 | - okvqa/defaults/annotations/annotations/imdb_trainval_v10.npy 9 | val: 10 | - okvqa/defaults/annotations/annotations/imdb_test_v10.npy 11 | test: 12 | - okvqa/defaults/annotations/annotations/imdb_test_v10.npy 13 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/train_val_okvqav10_fromfullpretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./okvqav10_fromfullpretrain.yaml 3 | 4 | dataset_config: 5 | okvqa: 6 | annotations: 7 | train: 8 | - okvqa/defaults/annotations/annotations/imdb_trainval_v10.npy 9 | val: 10 | - okvqa/defaults/annotations/annotations/imdb_test_v10.npy 11 | test: 12 | - okvqa/defaults/annotations/annotations/imdb_test_v10.npy 13 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/train_val_random.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./randomgraph.yaml 3 | 4 | dataset_config: 5 | okvqa: 6 | annotations: 7 | train: 8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy 9 | val: 10 | - okvqa/defaults/annotations/annotations/imdb_test.npy 11 | test: 12 | - okvqa/defaults/annotations/annotations/imdb_test.npy 13 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/train_val_vgonly.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./visualgenome_only.yaml 3 | 4 | dataset_config: 5 | okvqa: 6 | annotations: 7 | train: 8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy 9 | val: 10 | - okvqa/defaults/annotations/annotations/imdb_test.npy 11 | test: 12 | - okvqa/defaults/annotations/annotations/imdb_test.npy 13 | -------------------------------------------------------------------------------- /projects/krisp/configs/krisp/okvqa/visualgenome_only.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | krisp: 6 | graph_module: 7 | kg_path: okvqa/defaults/annotations/annotations//graphs/vg_graph.pth.tar 8 | node2vec_filename: okvqa/defaults/annotations/annotations/node2vec/node2vec_vg.pkl 9 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_vg.pth.tar 10 | dataset_config: 11 | okvqa: 12 | processors: 13 | answer_processor: 14 | params: 15 | graph_vocab_file: okvqa/defaults/annotations/annotations/graph_vocab/graph_vocab_vg.pth.tar 16 | -------------------------------------------------------------------------------- /projects/krisp/configs/visual_bert/masked_coco/okvqa_safe.yaml: -------------------------------------------------------------------------------- 1 | optimizer: 2 | type: adam_w 3 | params: 4 | lr: 5e-5 5 | eps: 1e-8 6 | 7 | scheduler: 8 | type: warmup_cosine 9 | params: 10 | num_warmup_steps: 2000 11 | num_training_steps: 88000 12 | 13 | dataset_config: 14 | masked_coco: 15 | return_features_info: true 16 | features: 17 | train: 18 | - okvqa/defaults/features/features_fc6/COCO_trainval2014.lmdb 19 | val: 20 | - okvqa/defaults/features/features_fc6/COCO_trainval2014.lmdb 21 | test: 22 | - okvqa/defaults/features/features_fc6/COCO_trainval2014.lmdb 23 | 24 | training: 25 | find_unused_parameters: true 26 | batch_size: 56 27 | lr_scheduler: true 28 | # Don't forget to update schedule_attributes if you update this 29 | max_updates: 88000 30 | -------------------------------------------------------------------------------- /projects/krisp/configs/visual_bert/okvqa/defaults_v10.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | visual_bert: 6 | num_labels: 2253 7 | 8 | dataset_config: 9 | okvqa: 10 | answer_processor: 11 | params: 12 | vocab_file: okvqa/defaults/annotations/annotations/answer_vocab_v10_count10.txt 13 | -------------------------------------------------------------------------------- /projects/krisp/configs/visual_bert/okvqa/train_val.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | okvqa: 6 | annotations: 7 | train: 8 | - okvqa/defaults/annotations/annotations/imdb_trainval.npy 9 | val: 10 | - okvqa/defaults/annotations/annotations/imdb_test.npy 11 | test: 12 | - okvqa/defaults/annotations/annotations/imdb_test.npy 13 | -------------------------------------------------------------------------------- /projects/krisp/configs/visual_bert/okvqa/train_val_okvqav10.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults_v10.yaml 3 | 4 | dataset_config: 5 | okvqa: 6 | annotations: 7 | train: 8 | - okvqa/defaults/annoations/annotations/imdb_trainval_v10.npy 9 | val: 10 | - okvqa/defaults/annotations/annoations/imdb_test_v10.npy 11 | test: 12 | - okvqa/defaults/annotations/annoations/imdb_test_v10.npy 13 | -------------------------------------------------------------------------------- /projects/krisp/requirements.txt: -------------------------------------------------------------------------------- 1 | networkx 2 | torch_geometric 3 | gensim 4 | -------------------------------------------------------------------------------- /projects/lorra/configs/vqa2/train_val.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | # Use soft copy 5 | dataset_config: 6 | vqa2_train_val: 7 | use_ocr: true 8 | processors: 9 | context_processor: 10 | type: fasttext 11 | params: 12 | download_initially: true 13 | max_length: 50 14 | model_file: wiki.en.bin 15 | answer_processor: 16 | type: soft_copy_answer 17 | params: 18 | vocab_file: vqa2/defaults/extras/vocabs/answers_vqa.txt 19 | preprocessor: 20 | type: simple_word 21 | params: {} 22 | context_preprocessor: 23 | type: simple_word 24 | params: {} 25 | max_length: 50 26 | num_answers: 10 27 | -------------------------------------------------------------------------------- /projects/lorra/configs/vqa2/train_val_resnet_only.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | vqa2_train_val: 6 | use_images: false 7 | use_features: true 8 | zoo_requirements: 9 | - coco.resnet152 10 | - vqa2.defaults 11 | features: 12 | train: 13 | - coco/resnet152/features/trainval2014.lmdb 14 | - coco/resnet152/features/trainval2014.lmdb 15 | val: 16 | - coco/resnet152/features/trainval2014.lmdb 17 | test: 18 | - coco/resnet152/features/test2015.lmdb 19 | annotations: 20 | train: 21 | - vqa2/defaults/annotations/imdb_train2014.npy 22 | - vqa2/defaults/annotations/imdb_valminusminival2014.npy 23 | val: 24 | - vqa2/defaults/annotations/imdb_minival2014.npy 25 | 26 | model_config: 27 | lorra: 28 | image_feature_encodings: 29 | - type: default 30 | params: {} 31 | -------------------------------------------------------------------------------- /projects/lxmert/configs/coco/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../defaults.yaml 3 | - ./masked.yaml 4 | -------------------------------------------------------------------------------- /projects/lxmert/configs/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/models/lxmert/defaults.yaml 3 | 4 | optimizer: 5 | type: adam_w 6 | params: 7 | lr: 1e-4 8 | eps: 1e-8 9 | 10 | training: 11 | seed: 9595 12 | batch_size: 4 13 | lr_scheduler: false 14 | find_unused_parameters: true 15 | use_warmup: true 16 | warmup_factor: 0.05 17 | warmup_iterations: 1000 18 | max_epochs: 20 19 | max_updates: null 20 | pin_memory: true 21 | 22 | 23 | evaluation: 24 | metrics: 25 | - vqa_accuracy 26 | -------------------------------------------------------------------------------- /projects/lxmert/configs/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | - coco/masked.yaml 4 | - gqa/masked.yaml 5 | - visual_genome/masked.yaml 6 | - vqa2/masked.yaml 7 | - configs/models/lxmert/defaults.yaml 8 | -------------------------------------------------------------------------------- /projects/lxmert/configs/visual_genome/masked.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | masked_visual_genome: 3 | use_features: true 4 | add_answer: true 5 | max_features: 36 6 | features: 7 | train: 8 | - visual_genome/detectron_fix_100/fc6/,visual_genome/resnet152/ 9 | - visual_genome/detectron_fix_100/fc6/,visual_genome/resnet152/ 10 | annotations: 11 | train: 12 | - imdb/visual_genome/vg_question_answers.jsonl 13 | - imdb/visual_genome/vg_question_answers_placeholder.jsonl 14 | -------------------------------------------------------------------------------- /projects/lxmert/configs/visual_genome/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./masked.yaml 3 | 4 | optimizer: 5 | type: adam_w 6 | params: 7 | lr: 1e-4 8 | eps: 1e-8 9 | 10 | scheduler: 11 | type: warmup_linear 12 | params: 13 | num_warmup_steps: 1000 14 | num_training_steps: ${training.max_updates} 15 | 16 | training: 17 | batch_size: 480 18 | lr_scheduler: true 19 | # Don't forget to update schedule_attributes if you update this 20 | max_updates: 11000 21 | -------------------------------------------------------------------------------- /projects/lxmert/configs/vqa2/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../defaults.yaml 3 | - ./masked.yaml 4 | -------------------------------------------------------------------------------- /projects/m4c/README.md: -------------------------------------------------------------------------------- 1 | # Iterative Answer Prediction with Pointer-Augmented Multimodal Transformers for TextVQA 2 | 3 | This repository contains the code for M4C model from the following paper, released under the MMF: 4 | 5 | * R. Hu, A. Singh, T. Darrell, M. Rohrbach, *Iterative Answer Prediction with Pointer-Augmented Multimodal Transformers for TextVQA*. in CVPR, 2020 ([PDF](https://arxiv.org/pdf/1911.06258.pdf)) 6 | ``` 7 | @inproceedings{hu2020iterative, 8 | title={Iterative Answer Prediction with Pointer-Augmented Multimodal Transformers for TextVQA}, 9 | author={Hu, Ronghang and Singh, Amanpreet and Darrell, Trevor and Rohrbach, Marcus}, 10 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 11 | year={2020} 12 | } 13 | ``` 14 | 15 | Please see [https://mmf.sh/docs/projects/m4c](https://mmf.sh/docs/projects/m4c) for more details on how to use m4c model. 16 | -------------------------------------------------------------------------------- /projects/m4c/configs/textvqa/joint_with_stvqa.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | textvqa: 6 | use_images: false 7 | use_features: true 8 | zoo_requirements: 9 | - textvqa.defaults 10 | - textvqa.ocr_en 11 | - stvqa.defaults 12 | - stvqa.ocr_en 13 | features: 14 | train: 15 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb 16 | - stvqa/defaults/features/detectron.lmdb,stvqa/ocr_en/features/ocr_en_frcn_features.lmdb 17 | annotations: 18 | train: 19 | - textvqa/defaults/annotations/imdb_train_ocr_en.npy 20 | - stvqa/defaults/annotations/imdb_subtrain.npy 21 | -------------------------------------------------------------------------------- /projects/m4c/configs/textvqa/ocr_ml.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | textvqa: 6 | zoo_requirements: 7 | - textvqa.defaults 8 | - textvqa.ocr_ml 9 | features: 10 | train: 11 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_ml/features/ocr_ml_frcn_features.lmdb 12 | val: 13 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_ml/features/ocr_ml_frcn_features.lmdb 14 | test: 15 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_ml/features/ocr_ml_frcn_features.lmdb 16 | 17 | annotations: 18 | train: 19 | - textvqa/defaults/annotations/imdb_train_ocr_ml.npy 20 | val: 21 | - textvqa/defaults/annotations/imdb_val_ocr_ml.npy 22 | test: 23 | - textvqa/defaults/annotations/imdb_test_ocr_ml.npy 24 | -------------------------------------------------------------------------------- /projects/m4c/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /projects/m4c_captioner/README.md: -------------------------------------------------------------------------------- 1 | # TextCaps: a Dataset for Image Captioning with Reading Comprehension 2 | 3 | This repository contains the code for M4C-Captioner model from the following paper, released under the MMF. 4 | 5 | * O. Sidorov, R. Hu, M. Rohrbach, A. Singh, *TextCaps: a Dataset for Image Captioning with Reading Comprehension*. in ECCV, 2020 ([PDF](https://arxiv.org/pdf/2003.12462.pdf)) 6 | ``` 7 | @inproceedings{sidorov2019textcaps, 8 | title={TextCaps: a Dataset for Image Captioningwith Reading Comprehension}, 9 | author={Sidorov, Oleksii and Hu, Ronghang and Rohrbach, Marcus and Singh, Amanpreet}, 10 | booktitle={European Conference on Computer Vision}, 11 | year={2020} 12 | } 13 | ``` 14 | 15 | Please see [https://mmf.sh/docs/projects/m4c_captioner](https://mmf.sh/docs/projects/m4c_captioner) for more details on how to use m4c_captioner model. 16 | -------------------------------------------------------------------------------- /projects/m4c_captioner/configs/butd/textcaps: -------------------------------------------------------------------------------- 1 | ../../../butd/configs/textcaps -------------------------------------------------------------------------------- /projects/m4c_captioner/configs/m4c_captioner/coco/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/datasets/coco/ocr_en.yaml 3 | optimizer: 4 | params: 5 | eps: 1.0e-08 6 | lr: 1e-4 7 | weight_decay: 0 8 | type: Adam 9 | 10 | evaluation: 11 | metrics: 12 | - textcaps_bleu4 13 | 14 | training: 15 | clip_norm_mode: all 16 | clip_gradients: true 17 | max_grad_l2_norm: 0.25 18 | lr_scheduler: true 19 | lr_steps: 20 | - 14000 21 | - 19000 22 | lr_ratio: 0.1 23 | use_warmup: true 24 | warmup_factor: 0.2 25 | warmup_iterations: 1000 26 | max_iterations: 24000 27 | batch_size: 128 28 | num_workers: 8 29 | early_stop: 30 | criteria: coco/textcaps_bleu4 31 | minimize: false 32 | -------------------------------------------------------------------------------- /projects/m4c_captioner/configs/m4c_captioner/coco/eval_on_textcaps.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | textcaps: 6 | zoo_requirements: 7 | - textvqa.defaults 8 | - textvqa.ocr_en 9 | - textcaps.defaults 10 | use_images: false 11 | use_features: true 12 | features: 13 | val: 14 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb 15 | test: 16 | - textvqa/defaults/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb 17 | annotations: 18 | val: 19 | - textcaps/defaults/annotations/imdb_val_filtered_by_image_id.npy # only one sample per image_id 20 | test: 21 | - textcaps/defaults/annotations/imdb_test_filtered_by_image_id.npy # only one sample per image_id 22 | -------------------------------------------------------------------------------- /projects/m4c_captioner/configs/m4c_captioner/textcaps/with_caffe2_feat.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | textcaps: 6 | zoo_requirements: 7 | - textvqa.caffe2 8 | - textvqa.ocr_en 9 | - textcaps.defaults 10 | features: 11 | train: 12 | - textvqa/caffe2/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb 13 | val: 14 | - textvqa/caffe2/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb 15 | test: 16 | - textvqa/caffe2/features/open_images/detectron.lmdb,textvqa/ocr_en/features/ocr_en_frcn_features.lmdb 17 | -------------------------------------------------------------------------------- /projects/m4c_captioner/configs/m4c_captioner/textcaps/without_ocr.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | dataset_config: 4 | textcaps: 5 | use_ocr: False # remove all the OCRs from each image 6 | -------------------------------------------------------------------------------- /projects/m4c_captioner/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /projects/mmbt/configs/hateful_memes/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/models/mmbt/classification.yaml 3 | - configs/datasets/hateful_memes/bert.yaml 4 | 5 | scheduler: 6 | type: warmup_linear 7 | params: 8 | num_warmup_steps: 2000 9 | num_training_steps: ${training.max_updates} 10 | 11 | optimizer: 12 | type: adam_w 13 | params: 14 | lr: 1e-5 15 | eps: 1e-8 16 | 17 | evaluation: 18 | metrics: 19 | - accuracy 20 | - binary_f1 21 | - roc_auc 22 | 23 | training: 24 | batch_size: 32 25 | lr_scheduler: true 26 | max_updates: 22000 27 | early_stop: 28 | criteria: hateful_memes/roc_auc 29 | minimize: false 30 | 31 | checkpoint: 32 | pretrained_state_mapping: 33 | bert: bert 34 | -------------------------------------------------------------------------------- /projects/mmbt/configs/hateful_memes/hateful_with_refiner.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/models/mmbt/classification.yaml 3 | - configs/datasets/hateful_memes/bert.yaml 4 | 5 | scheduler: 6 | type: warmup_linear 7 | params: 8 | num_warmup_steps: 2000 9 | num_training_steps: ${training.max_updates} 10 | 11 | optimizer: 12 | type: adam_w 13 | params: 14 | lr: 1e-5 15 | eps: 1e-8 16 | 17 | evaluation: 18 | metrics: 19 | - accuracy 20 | - binary_f1 21 | - roc_auc 22 | 23 | training: 24 | batch_size: 32 25 | lr_scheduler: true 26 | max_updates: 22000 27 | early_stop: 28 | criteria: hateful_memes/roc_auc 29 | minimize: false 30 | 31 | checkpoint: 32 | pretrained_state_mapping: 33 | bert: bert 34 | -------------------------------------------------------------------------------- /projects/mmbt/configs/hateful_memes/with_features.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | - configs/models/mmbt/with_features.yaml 4 | - configs/datasets/hateful_memes/with_features.yaml 5 | 6 | optimizer: 7 | type: adam_w 8 | params: 9 | lr: 5e-5 10 | eps: 1e-8 11 | -------------------------------------------------------------------------------- /projects/mmbt/configs/masked_coco/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/models/mmbt/pretrain.yaml 3 | - configs/models/mmbt/with_features.yaml 4 | 5 | scheduler: 6 | type: warmup_linear 7 | params: 8 | num_warmup_steps: 2000 9 | num_training_steps: ${training.max_updates} 10 | 11 | optimizer: 12 | type: adam_w 13 | params: 14 | lr: 5e-5 15 | eps: 1e-8 16 | 17 | training: 18 | batch_size: 128 19 | lr_scheduler: true 20 | max_updates: 22000 21 | 22 | checkpoint: 23 | pretrained_state_mapping: 24 | bert: bert 25 | -------------------------------------------------------------------------------- /projects/mmbt/configs/mmimdb/with_features.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | - configs/models/mmbt/with_features.yaml 4 | - configs/datasets/mmimdb/with_features.yaml 5 | -------------------------------------------------------------------------------- /projects/mmbt/configs/okvqa/with_images.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/models/mmbt/classification.yaml 3 | 4 | scheduler: 5 | type: warmup_linear 6 | params: 7 | num_warmup_steps: 2000 8 | num_training_steps: ${training.max_updates} 9 | 10 | dataset_config: 11 | okvqa: 12 | processors: 13 | text_processor: 14 | type: bert_tokenizer 15 | params: 16 | tokenizer_config: 17 | type: bert-base-uncased 18 | params: 19 | do_lower_case: true 20 | mask_probability: 0 21 | max_seq_length: 128 22 | 23 | model_config: 24 | mmbt: 25 | losses: 26 | - logit_bce 27 | num_labels: 2253 28 | 29 | optimizer: 30 | type: adam_w 31 | params: 32 | lr: 1e-5 33 | eps: 1e-8 34 | 35 | evaluation: 36 | metrics: 37 | - vqa_accuracy 38 | 39 | training: 40 | batch_size: 32 41 | lr_scheduler: true 42 | max_updates: 22000 43 | early_stop: 44 | criteria: okvqa/vqa_accuracy 45 | minimize: false 46 | 47 | checkpoint: 48 | pretrained_state_mapping: 49 | bert: bert 50 | -------------------------------------------------------------------------------- /projects/mmbt/configs/vqa2/with_raw_images.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | - ../../../../mmf/configs/datasets/vqa2/with_raw_images.yaml 4 | -------------------------------------------------------------------------------- /projects/mmf_transformer/configs/charades/direct.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/models/mmf_transformer/with_audio_video.yaml 3 | 4 | model_config: 5 | mmf_transformer: 6 | heads: 7 | - type: mlp 8 | num_labels: 157 9 | 10 | optimizer: 11 | type: adam_w 12 | params: 13 | lr: 5e-5 14 | eps: 1e-8 15 | 16 | scheduler: 17 | type: warmup_cosine 18 | params: 19 | num_warmup_steps: 2000 20 | num_training_steps: 60000 21 | 22 | evaluation: 23 | metrics: 24 | - multilabel_micro_f1 25 | 26 | training: 27 | batch_size: 8 28 | lr_scheduler: true 29 | # Don't forget to update schedule_attributes if you update this 30 | max_updates: 60000 31 | find_unused_parameters: true 32 | early_stop: 33 | criteria: charades/multilabel_micro_f1 34 | minimize: false 35 | -------------------------------------------------------------------------------- /projects/mmf_transformer/configs/hateful_memes/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/datasets/hateful_memes/bert.yaml 3 | 4 | model_config: 5 | mmf_transformer: 6 | training_head_type: classification 7 | num_labels: 2 8 | losses: 9 | - cross_entropy 10 | 11 | scheduler: 12 | type: warmup_linear 13 | params: 14 | num_warmup_steps: 2000 15 | num_training_steps: ${training.max_updates} 16 | 17 | optimizer: 18 | type: adam_w 19 | params: 20 | lr: 1e-5 21 | eps: 1e-8 22 | 23 | evaluation: 24 | metrics: 25 | - accuracy 26 | - binary_f1 27 | - roc_auc 28 | 29 | training: 30 | batch_size: 32 31 | lr_scheduler: true 32 | max_updates: 22000 33 | early_stop: 34 | criteria: hateful_memes/roc_auc 35 | minimize: false 36 | 37 | checkpoint: 38 | pretrained_state_mapping: 39 | pooler: pooler 40 | backend.transformer: backend.transformer 41 | backend.embeddings: backend.embeddings 42 | -------------------------------------------------------------------------------- /projects/mmf_transformer/configs/masked_coco/pretrain_itm.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | model_config: 5 | mmf_transformer: 6 | heads: 7 | - type: itm 8 | freeze: false 9 | lr_multiplier: 1.0 10 | # default for bert base 11 | hidden_size: 768 12 | 13 | dataset_config: 14 | masked_coco: 15 | return_features_info: true 16 | false_caption: true 17 | false_caption_probability: 0.1 18 | -------------------------------------------------------------------------------- /projects/mmf_transformer/localized_narratives/masked.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/datasets/localized_narratives/masked.yaml 3 | - configs/models/mmf_transformer/pretrain.yaml 4 | 5 | optimizer: 6 | type: adam_w 7 | params: 8 | lr: 5e-5 9 | eps: 1e-8 10 | scheduler: 11 | type: warmup_linear 12 | params: 13 | num_warmup_steps: 1000 14 | num_training_steps: 11000 15 | training: 16 | batch_size: 2 17 | lr_scheduler: true 18 | # Don't forget to update schedule_attributes if you update this 19 | max_updates: 11000 20 | -------------------------------------------------------------------------------- /projects/others/cnn_lstm/clevr/defaults.yaml: -------------------------------------------------------------------------------- 1 | optimizer: 2 | type: Adamax 3 | params: 4 | eps: 1.0e-08 5 | lr: 0.01 6 | weight_decay: 0 7 | 8 | evaluation: 9 | metrics: 10 | - accuracy 11 | 12 | training: 13 | batch_size: 128 14 | snapshot_interval: 6000 15 | early_stop: 16 | criteria: clevr/accuracy 17 | minimize: false 18 | -------------------------------------------------------------------------------- /projects/others/cnn_lstm/hateful_memes/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | cnn_lstm: 3 | losses: 4 | - type: cross_entropy 5 | classifier: 6 | type: mlp 7 | params: 8 | in_dim: 190 9 | out_dim: 2 10 | 11 | scheduler: 12 | type: warmup_linear 13 | params: 14 | num_warmup_steps: 2000 15 | num_training_steps: ${training.max_updates} 16 | 17 | optimizer: 18 | type: adam_w 19 | params: 20 | lr: 5e-5 21 | eps: 1e-8 22 | 23 | evaluation: 24 | metrics: 25 | - accuracy 26 | - binary_f1 27 | - roc_auc 28 | 29 | training: 30 | batch_size: 480 31 | lr_scheduler: true 32 | max_updates: 60000 33 | early_stop: 34 | criteria: hateful_memes/roc_auc 35 | minimize: false 36 | -------------------------------------------------------------------------------- /projects/others/concat_bert/hateful_memes/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/datasets/hateful_memes/bert.yaml 3 | 4 | model_config: 5 | concat_bert: 6 | classifier: 7 | type: mlp 8 | params: 9 | num_layers: 2 10 | losses: 11 | - type: cross_entropy 12 | 13 | scheduler: 14 | type: warmup_linear 15 | params: 16 | num_warmup_steps: 2000 17 | num_training_steps: ${training.max_updates} 18 | 19 | optimizer: 20 | type: adam_w 21 | params: 22 | lr: 1e-5 23 | eps: 1e-8 24 | 25 | evaluation: 26 | metrics: 27 | - accuracy 28 | - binary_f1 29 | - roc_auc 30 | 31 | training: 32 | batch_size: 64 33 | lr_scheduler: true 34 | max_updates: 22000 35 | early_stop: 36 | criteria: hateful_memes/roc_auc 37 | minimize: false 38 | 39 | checkpoint: 40 | pretrained_state_mapping: 41 | base: base 42 | -------------------------------------------------------------------------------- /projects/others/concat_bow/hateful_memes/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | concat_bow: 3 | classifier: 4 | type: mlp 5 | params: 6 | num_layers: 2 7 | losses: 8 | - type: cross_entropy 9 | 10 | scheduler: 11 | type: warmup_linear 12 | params: 13 | num_warmup_steps: 2000 14 | num_training_steps: ${training.max_updates} 15 | 16 | optimizer: 17 | type: adam_w 18 | params: 19 | lr: 5e-5 20 | eps: 1e-8 21 | 22 | evaluation: 23 | metrics: 24 | - accuracy 25 | - binary_f1 26 | - roc_auc 27 | 28 | training: 29 | batch_size: 32 30 | lr_scheduler: true 31 | max_updates: 22000 32 | early_stop: 33 | criteria: hateful_memes/roc_auc 34 | minimize: false 35 | 36 | checkpoint: 37 | pretrained_state_mapping: 38 | base: base 39 | -------------------------------------------------------------------------------- /projects/others/late_fusion/hateful_memes/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/datasets/hateful_memes/bert.yaml 3 | 4 | model_config: 5 | late_fusion: 6 | modal_classifier: 7 | type: mlp 8 | params: 9 | num_layers: 2 10 | text_classifier: 11 | type: mlp 12 | params: 13 | num_layers: 2 14 | losses: 15 | - type: cross_entropy 16 | 17 | scheduler: 18 | type: warmup_linear 19 | params: 20 | num_warmup_steps: 2000 21 | num_training_steps: ${training.max_updates} 22 | 23 | optimizer: 24 | type: adam_w 25 | params: 26 | lr: 5e-5 27 | eps: 1e-8 28 | 29 | evaluation: 30 | metrics: 31 | - accuracy 32 | - binary_f1 33 | - roc_auc 34 | 35 | training: 36 | batch_size: 64 37 | lr_scheduler: true 38 | max_updates: 22000 39 | early_stop: 40 | criteria: hateful_memes/roc_auc 41 | minimize: false 42 | 43 | checkpoint: 44 | pretrained_state_mapping: 45 | base: base 46 | -------------------------------------------------------------------------------- /projects/others/mmf_bert/configs/masked_coco/defaults.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | masked_coco: 3 | return_features_info: true 4 | 5 | optimizer: 6 | type: adam_w 7 | params: 8 | lr: 5e-5 9 | eps: 1e-8 10 | 11 | scheduler: 12 | type: warmup_linear 13 | params: 14 | num_warmup_steps: 1000 15 | num_training_steps: 11000 16 | 17 | training: 18 | batch_size: 480 19 | lr_scheduler: true 20 | # Don't forget to update schedule_attributes if you update this 21 | max_updates: 11000 22 | -------------------------------------------------------------------------------- /projects/others/mmf_bert/configs/masked_coco/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/others/mmf_bert/configs/masked_coco/pretrain_joint_vqa2.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | - ../masked_vqa2/defaults.yaml 4 | 5 | model_config: 6 | mmf_bert: 7 | training_head_type: pretraining,vqa 8 | 9 | scheduler: 10 | type: warmup_linear 11 | params: 12 | num_warmup_steps: 3000 13 | num_training_steps: 33000 14 | 15 | training: 16 | max_updates: 34000 17 | -------------------------------------------------------------------------------- /projects/others/mmf_bert/configs/masked_conceptual_captions/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../masked_coco/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | return_features_info: true 7 | -------------------------------------------------------------------------------- /projects/others/mmf_bert/configs/masked_conceptual_captions/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/others/mmf_bert/configs/masked_vqa2/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../masked_coco/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_vqa2: 6 | annotations: 7 | train: 8 | - vqa2/defaults/annotations/imdb_train2014_len_coco.npy 9 | return_features_info: true 10 | -------------------------------------------------------------------------------- /projects/others/mmf_bert/configs/masked_vqa2/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/others/mmf_bert/configs/visual_entailment/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | mmf_bert: 3 | training_head_type: visual_entailment 4 | losses: 5 | - type: cross_entropy 6 | 7 | dataset_config: 8 | visual_entailment: 9 | return_features_info: true 10 | processors: 11 | text_processor: 12 | type: bert_tokenizer 13 | params: 14 | tokenizer_config: 15 | type: bert-base-uncased 16 | params: 17 | do_lower_case: true 18 | mask_probability: 0 19 | max_seq_length: 128 20 | 21 | optimizer: 22 | type: adam_w 23 | params: 24 | lr: 5e-5 25 | eps: 1e-8 26 | 27 | scheduler: 28 | type: warmup_linear 29 | params: 30 | num_warmup_steps: 6000 31 | num_training_steps: 60000 32 | 33 | evaluation: 34 | metrics: 35 | - accuracy 36 | 37 | training: 38 | early_stop: 39 | criteria: visual_entailment/accuracy 40 | minimize: false 41 | -------------------------------------------------------------------------------- /projects/others/unimodal/configs/hateful_memes/bert.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./text.yaml 3 | - configs/datasets/hateful_memes/bert.yaml 4 | - configs/models/unimodal/bert.yaml 5 | 6 | model_config: 7 | unimodal_text: 8 | classifier: 9 | type: mlp 10 | params: 11 | in_dim: 768 12 | num_layers: 2 13 | 14 | training: 15 | batch_size: 128 16 | -------------------------------------------------------------------------------- /projects/others/unimodal/configs/hateful_memes/image.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | unimodal_image: 3 | classifier: 4 | type: mlp 5 | params: 6 | num_layers: 2 7 | losses: 8 | - type: cross_entropy 9 | 10 | scheduler: 11 | type: warmup_linear 12 | params: 13 | num_warmup_steps: 2000 14 | num_training_steps: ${training.max_updates} 15 | 16 | optimizer: 17 | type: adam_w 18 | params: 19 | lr: 1e-5 20 | eps: 1e-8 21 | 22 | evaluation: 23 | metrics: 24 | - accuracy 25 | - binary_f1 26 | - roc_auc 27 | 28 | training: 29 | batch_size: 32 30 | lr_scheduler: true 31 | max_updates: 22000 32 | early_stop: 33 | criteria: hateful_memes/roc_auc 34 | minimize: false 35 | 36 | checkpoint: 37 | pretrained_state_mapping: 38 | base: base 39 | -------------------------------------------------------------------------------- /projects/others/unimodal/configs/hateful_memes/text.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | unimodal_text: 3 | classifier: 4 | type: mlp 5 | params: 6 | num_layers: 2 7 | losses: 8 | - type: cross_entropy 9 | 10 | scheduler: 11 | type: warmup_linear 12 | params: 13 | num_warmup_steps: 2000 14 | num_training_steps: ${training.max_updates} 15 | 16 | optimizer: 17 | type: adam_w 18 | params: 19 | lr: 5e-5 20 | eps: 1e-8 21 | 22 | evaluation: 23 | metrics: 24 | - accuracy 25 | - binary_f1 26 | - roc_auc 27 | 28 | training: 29 | batch_size: 32 30 | lr_scheduler: true 31 | max_updates: 22000 32 | early_stop: 33 | criteria: hateful_memes/roc_auc 34 | minimize: false 35 | 36 | checkpoint: 37 | pretrained_state_mapping: 38 | base: base 39 | -------------------------------------------------------------------------------- /projects/others/unimodal/configs/hateful_memes/with_features.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./image.yaml 3 | - configs/datasets/hateful_memes/with_features.yaml 4 | - configs/models/unimodal/with_features.yaml 5 | 6 | 7 | optimizer: 8 | type: adam_w 9 | params: 10 | lr: 5e-5 11 | eps: 1e-8 12 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_coco/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../../vilbert/configs/masked_coco/defaults.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_coco/fifty_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/vilbert/configs/masked_coco/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_coco: 6 | annotations: 7 | train: 8 | - coco/defaults/annotations/imdb_karpathy_train_by_image_50_pc.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_coco/full.yaml: -------------------------------------------------------------------------------- 1 | ../../../../vilbert/configs/masked_coco/pretrain.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_coco/ten_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/vilbert/configs/masked_coco/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_coco: 6 | annotations: 7 | train: 8 | - coco/defaults/annotations/imdb_karpathy_train_by_image_10_pc.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../../vilbert/configs/masked_conceptual_captions/defaults.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/full.yaml: -------------------------------------------------------------------------------- 1 | ../../../../vilbert/configs/masked_conceptual_captions/pretrain.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/full_coco_generated.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | zoo_requirements: 7 | - cc.coco_generated 8 | - cc.defaults 9 | annotations: 10 | train: 11 | - cc/coco_generated/annotations/train_all.npy 12 | val: 13 | - cc/coco_generated/annotations/val.npy 14 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/half.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | annotations: 7 | train: 8 | - cc/defaults/annotations/train_mid.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/half_coco_generated.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | zoo_requirements: 7 | - cc.coco_generated 8 | - cc.defaults 9 | annotations: 10 | train: 11 | - cc/coco_generated/annotations/train_mid.npy 12 | val: 13 | - cc/coco_generated/annotations/val.npy 14 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | annotations: 7 | train: 8 | - cc/defaults/annotations/train_small.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small_coco_generated.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | zoo_requirements: 7 | - cc.coco_generated 8 | - cc.defaults 9 | annotations: 10 | train: 11 | - cc/coco_generated/annotations/train_small.npy 12 | val: 13 | - cc/coco_generated/annotations/val.npy 14 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small_fifty_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | annotations: 7 | train: 8 | - cc/defaults/annotations/train_small_50_pc.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_conceptual_captions/small_ten_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | annotations: 7 | train: 8 | - cc/defaults/annotations/train_small_10_pc.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_vqa2/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../../vilbert/configs/masked_vqa2/defaults.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_vqa2/fifty_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/vilbert/configs/masked_vqa2/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_vqa2: 6 | annotations: 7 | train: 8 | - vqa2/defaults/annotations/imdb_train2014_len_coco_50_pc.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_vqa2/full.yaml: -------------------------------------------------------------------------------- 1 | ../../../../vilbert/configs/masked_vqa2/pretrain.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/vilbert/masked_vqa2/ten_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/vilbert/configs/masked_vqa2/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_vqa2: 6 | return_features_info: true 7 | use_images: false 8 | use_features: true 9 | features: 10 | train: 11 | - coco/defaults/features/coco_trainval2014.lmdb 12 | annotations: 13 | train: 14 | - vqa2/defaults/annotations/imdb_train2014_len_coco_10_pc.npy 15 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_coco/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../../visual_bert/configs/masked_coco/defaults.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_coco/fifty_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/visual_bert/configs/masked_coco/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_coco: 6 | annotations: 7 | train: 8 | - coco/defaults/annotations/imdb_karpathy_train_by_image_50_pc.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_coco/full.yaml: -------------------------------------------------------------------------------- 1 | ../../../../visual_bert/configs/masked_coco/pretrain.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_coco/full_train_val.yaml: -------------------------------------------------------------------------------- 1 | ../../../../visual_bert/configs/masked_coco/pretrain_train_val.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_coco/ten_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/visual_bert/configs/masked_coco/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_coco: 6 | annotations: 7 | train: 8 | - coco/defaults/annotations/imdb_karpathy_train_by_image_10_pc.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../../visual_bert/configs/masked_conceptual_captions/defaults.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/full.yaml: -------------------------------------------------------------------------------- 1 | ../../../../visual_bert/configs/masked_conceptual_captions/pretrain.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/full_coco_generated.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | zoo_requirements: 7 | - cc.coco_generated 8 | - cc.defaults 9 | annotations: 10 | train: 11 | - cc/coco_generated/annotations/train_all.npy 12 | val: 13 | - cc/coco_generated/annotations/val.npy 14 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/half.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | annotations: 7 | train: 8 | - cc/defaults/annotations/train_mid.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/half_coco_generated.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | zoo_requirements: 7 | - cc.coco_generated 8 | - cc.defaults 9 | 10 | annotations: 11 | train: 12 | - cc/coco_generated/annotations/train_mid.npy 13 | val: 14 | - cc/coco_generated/annotations/val.npy 15 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | annotations: 7 | train: 8 | - cc/defaults/annotations/train_small.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small_coco_generated.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | zoo_requirements: 7 | - cc.coco_generated 8 | - cc.defaults 9 | 10 | annotations: 11 | train: 12 | - cc/coco_generated/annotations/train_small.npy 13 | val: 14 | - cc/coco_generated/annotations/val.npy 15 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small_fifty_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | annotations: 7 | train: 8 | - cc/defaults/annotations/train_small_50_pc.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_conceptual_captions/small_ten_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_conceptual_captions: 6 | annotations: 7 | train: 8 | - cc/defaults/annotations/train_small_10_pc.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/defaults.yaml: -------------------------------------------------------------------------------- 1 | ../../../../visual_bert/configs/masked_vqa2/defaults.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/fifty_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/visual_bert/configs/masked_vqa2/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_vqa2: 6 | annotations: 7 | train: 8 | - vqa2/defaults/annotations/imdb_train2014_len_coco_50_pc.npy 9 | -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/full.yaml: -------------------------------------------------------------------------------- 1 | ../../../../visual_bert/configs/masked_vqa2/pretrain.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/full_train_val.yaml: -------------------------------------------------------------------------------- 1 | ../../../../visual_bert/configs/masked_vqa2/pretrain_train_val.yaml -------------------------------------------------------------------------------- /projects/pretrain_vl_right/configs/visual_bert/masked_vqa2/ten_pc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/visual_bert/configs/masked_vqa2/pretrain.yaml 3 | 4 | dataset_config: 5 | masked_vqa2: 6 | annotations: 7 | train: 8 | - vqa2/defaults/annotations/imdb_train2014_len_coco_10_pc.npy 9 | -------------------------------------------------------------------------------- /projects/pythia/configs/masked_q_vqa2/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./pythia.yaml 3 | 4 | evaluation: 5 | metrics: 6 | - accuracy 7 | 8 | training: 9 | early_stop: 10 | criteria: masked_q_vqa2/accuracy 11 | minimize: false 12 | -------------------------------------------------------------------------------- /projects/pythia/configs/textvqa/defaults.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - configs/datasets/textvqa/with_resnet.yaml 3 | optimizer: 4 | type: Adamax 5 | params: 6 | lr: 0.005 7 | 8 | evaluation: 9 | metrics: 10 | - vqa_accuracy 11 | 12 | training: 13 | clip_norm_mode: all 14 | clip_gradients: false 15 | max_grad_l2_norm: 0.25 16 | lr_scheduler: true 17 | lr_steps: 18 | - 14000 19 | lr_ratio: 0.01 20 | use_warmup: true 21 | warmup_factor: 0.2 22 | warmup_iterations: 1000 23 | max_updates: 24000 24 | batch_size: 128 25 | num_workers: 7 26 | task_size_proportional_sampling: true 27 | early_stop: 28 | criteria: textvqa/vqa_accuracy 29 | minimize: false 30 | 31 | checkpoint: 32 | pretrained_state_mapping: 33 | text_embeddings: text_embeddings 34 | image_feature_encoders: image_feature_encoders 35 | image_feature_embeddings_list: image_feature_embeddings_list 36 | image_text_multi_modal_combine_layer: image_text_multi_modal_combine_layer 37 | -------------------------------------------------------------------------------- /projects/pythia/configs/visual_genome/defaults.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | visual_genome: 3 | return_scene_graph: false 4 | return_objects: false 5 | return_relationships: false 6 | return_features_info: false 7 | no_unk: true 8 | 9 | evaluation: 10 | metrics: 11 | - vqa_accuracy 12 | 13 | training: 14 | early_stop: 15 | criteria: visual_genome/vqa_accuracy 16 | minimize: false 17 | -------------------------------------------------------------------------------- /projects/pythia/configs/vizwiz/defaults.yaml: -------------------------------------------------------------------------------- 1 | optimizer: 2 | type: Adamax 3 | params: 4 | lr: 0.005 5 | 6 | evaluation: 7 | metrics: 8 | - vqa_accuracy 9 | 10 | training: 11 | clip_norm_mode: all 12 | clip_gradients: true 13 | max_grad_l2_norm: 0.25 14 | lr_scheduler: true 15 | lr_steps: 16 | - 14000 17 | lr_ratio: 0.01 18 | use_warmup: true 19 | warmup_factor: 0.2 20 | warmup_iterations: 1000 21 | max_updates: 24000 22 | batch_size: 128 23 | num_workers: 7 24 | task_size_proportional_sampling: true 25 | early_stop: 26 | criteria: vizwiz/vqa_accuracy 27 | minimize: false 28 | 29 | checkpoint: 30 | pretrained_state_mapping: 31 | word_embedding: word_embedding 32 | text_embeddings: text_embeddings 33 | image_feature_encoders: image_feature_encoders 34 | image_feature_embeddings_list: image_feature_embeddings_list 35 | image_text_multi_modal_combine_layer: image_text_multi_modal_combine_layer 36 | -------------------------------------------------------------------------------- /projects/pythia/configs/vqa2/debug.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | vqa2: 6 | use_images: false 7 | use_features: true 8 | features: 9 | train: 10 | - coco/defaults/features/trainval2014.lmdb 11 | val: 12 | - coco/defaults/features/trainval2014.lmdb 13 | annotations: 14 | train: 15 | - vqa2/defaults/annotations/imdb_debug.npy 16 | val: 17 | - vqa2/defaults/annotations/imdb_debug.npy 18 | -------------------------------------------------------------------------------- /projects/pythia/configs/vqa2/resnet_only.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | vqa2: 6 | use_images: false 7 | use_features: true 8 | zoo_requirements: 9 | - coco.resnet152 10 | - vqa2.defaults 11 | features: 12 | train: 13 | - coco/resnet152/features/trainval2014.lmdb 14 | val: 15 | - coco/resnet152/features/trainval2014.lmdb 16 | test: 17 | - coco/resnet152/features/test2015.lmdb 18 | model_config: 19 | pythia: 20 | image_feature_encodings: 21 | - type: default 22 | params: {} 23 | -------------------------------------------------------------------------------- /projects/pythia/configs/vqa2/train_val.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | vqa2: 6 | use_images: false 7 | use_features: true 8 | features: 9 | train: 10 | - coco/defaults/features/trainval2014.lmdb,coco/resnet152/features/trainval2014.lmdb 11 | - coco/defaults/features/trainval2014.lmdb,coco/resnet152/features/trainval2014.lmdb 12 | val: 13 | - coco/defaults/features/trainval2014.lmdb,coco/resnet152/features/trainval2014.lmdb 14 | test: 15 | - coco/defaults/features/test2015.lmdb,coco/resnet152/features/test2015.lmdb 16 | annotations: 17 | train: 18 | - vqa2/defaults/annotations/imdb_train2014.npy 19 | - vqa2/defaults/annotations/imdb_val2014.npy 20 | val: 21 | - vqa2/defaults/annotations/imdb_val2014.npy 22 | -------------------------------------------------------------------------------- /projects/pythia/configs/vqa2/train_val_resnet_only.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./resnet_only.yaml 3 | 4 | dataset_config: 5 | vqa2: 6 | use_images: false 7 | use_features: true 8 | features: 9 | train: 10 | - coco/resnet152/features/trainval2014.lmdb 11 | - coco/resnet152/features/trainval2014.lmdb 12 | val: 13 | - coco/resnet152/features/trainval2014.lmdb 14 | test: 15 | - coco/resnet152/features/test2015.lmdb 16 | annotations: 17 | train: 18 | - vqa2/defaults/annotations/imdb_train2014.npy 19 | - vqa2/defaults/annotations/imdb_valminusminival2014.npy 20 | val: 21 | - vqa2/defaults/annotations/imdb_minival2014.npy 22 | -------------------------------------------------------------------------------- /projects/unit/README.md: -------------------------------------------------------------------------------- 1 | # UniT: Multimodal Multitask Learning with a Unified Transformer 2 | 3 | This repository contains the code for UniT model from the following paper, released under the MMF: 4 | 5 | - R. Hu, A. Singh. _UniT: Multimodal Multitask Learning with a Unified Transformer_. arXiv preprint arXiv:2102.10772, 2021 ([PDF](https://arxiv.org/pdf/2102.10772.pdf)) 6 | 7 | ``` 8 | @article{hu2021unit, 9 | title={UniT: Multimodal multitask learning with a unified transformer}, 10 | author={Hu, Ronghang and Singh, Amanpreet}, 11 | journal={arXiv preprint arXiv:2102.10772}, 12 | year={2021} 13 | } 14 | ``` 15 | 16 | Please see [https://mmf.sh/docs/projects/unit](https://mmf.sh/docs/projects/unit) for more details on how to use the UniT model. 17 | -------------------------------------------------------------------------------- /projects/unit/configs/all_8_datasets/separate_dec.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./shared_dec.yaml 3 | 4 | model_config: 5 | unit: 6 | base_args: 7 | share_decoders: false 8 | 9 | optimizer: 10 | type: adam_w # HuggingFace transformer's AdamW 11 | -------------------------------------------------------------------------------- /projects/unit/configs/all_8_datasets/shared_dec_without_task_embedding.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./shared_dec.yaml 3 | 4 | model_config: 5 | unit: 6 | base_args: 7 | use_task_embedding_in_img_encoder: false 8 | use_task_embedding_in_lang_encoder: false 9 | -------------------------------------------------------------------------------- /projects/unit/configs/coco/single_task_without_task_embedding.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./single_task.yaml 3 | 4 | model_config: 5 | unit: 6 | base_args: 7 | use_task_embedding_in_img_encoder: false 8 | use_task_embedding_in_lang_encoder: false 9 | -------------------------------------------------------------------------------- /projects/unit/configs/coco_vg_vqa2/separate_dec.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./shared_dec.yaml 3 | 4 | model_config: 5 | unit: 6 | base_args: 7 | share_decoders: false 8 | 9 | optimizer: 10 | type: adam_w # HuggingFace transformer's AdamW 11 | -------------------------------------------------------------------------------- /projects/unit/configs/coco_vqa2/separate_dec.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./shared_dec.yaml 3 | 4 | model_config: 5 | unit: 6 | base_args: 7 | share_decoders: false 8 | 9 | optimizer: 10 | type: adam_w # HuggingFace transformer's AdamW 11 | -------------------------------------------------------------------------------- /projects/unit/configs/vg_vqa2/separate_dec.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./shared_dec.yaml 3 | 4 | model_config: 5 | unit: 6 | base_args: 7 | share_decoders: false 8 | 9 | optimizer: 10 | type: adam_w # HuggingFace transformer's AdamW 11 | -------------------------------------------------------------------------------- /projects/unit/configs/visual_entailment_dataset_cfg.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | visual_entailment: 3 | zoo_requirements: 4 | - visual_entailment.defaults 5 | - flickr30k.defaults 6 | use_features: false 7 | use_images: true 8 | processors: 9 | image_processor: 10 | type: torchvision_transforms 11 | params: 12 | transforms: 13 | - type: ResizeShortest 14 | params: 15 | min_size: 800 16 | max_size: 1333 17 | - ToTensor 18 | - type: Normalize 19 | params: 20 | mean: [0.485, 0.456, 0.406] 21 | std: [0.229, 0.224, 0.225] 22 | text_processor: 23 | type: bert_tokenizer 24 | params: 25 | tokenizer_config: 26 | type: bert-base-uncased 27 | params: 28 | do_lower_case: true 29 | mask_probability: 0 30 | max_seq_length: 25 31 | -------------------------------------------------------------------------------- /projects/uniter/README.md: -------------------------------------------------------------------------------- 1 | # UNITER 2 | 3 | This repository contains the code for pytorch implementation of UNITER model, released originally under this ([repo](https://github.com/ChenRocks/UNITER/)). Please cite the following papers if you are using UNITER model from mmf: 4 | 5 | * Chen, Y.-C., Li, L., Yu, L., Kholy, A. E., Ahmed, F., Gan, 6 | Z., Cheng, Y., and jing Liu, J. *Uniter: Universal imagetext representation learning.* In European Conference on 7 | Computer Vision, 2020b. ([arXiV](https://arxiv.org/pdf/1909.11740)) 8 | ``` 9 | @inproceedings{chen2020uniter, 10 | title={Uniter: Universal image-text representation learning}, 11 | author={Chen, Yen-Chun and Li, Linjie and Yu, Licheng and Kholy, Ahmed El and Ahmed, Faisal and Gan, Zhe and Cheng, Yu and Liu, Jingjing}, 12 | booktitle={ECCV}, 13 | year={2020} 14 | } 15 | ``` 16 | 17 | 18 | Please see [https://mmf.sh/docs/projects/uniter](https://mmf.sh/docs/projects/uniter) for more details on how to use the UNITER model. 19 | -------------------------------------------------------------------------------- /projects/vilbert/configs/hateful_memes/direct.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/vilbert/configs/hateful_memes/from_cc.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | checkpoint: 5 | resume_pretrained: true 6 | resume_zoo: vilbert.pretrained.cc.original 7 | -------------------------------------------------------------------------------- /projects/vilbert/configs/masked_coco/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | vilbert: 3 | training_head_type: pretraining 4 | 5 | dataset_config: 6 | masked_coco: 7 | return_features_info: true 8 | use_image_feature_masks: true 9 | 10 | optimizer: 11 | type: adam_w 12 | params: 13 | lr: 5e-5 14 | eps: 1e-8 15 | 16 | scheduler: 17 | type: warmup_linear 18 | params: 19 | num_warmup_steps: 1000 20 | num_training_steps: 11000 21 | 22 | training: 23 | batch_size: 480 24 | lr_scheduler: true 25 | # Don't forget to update schedule_attributes if you update this 26 | max_updates: 11000 27 | find_unused_parameters: true 28 | -------------------------------------------------------------------------------- /projects/vilbert/configs/masked_coco/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/vilbert/configs/masked_coco/pretrain_train_val.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | masked_coco: 6 | return_features_info: true 7 | use_images: false 8 | use_features: true 9 | features: 10 | train: 11 | - coco/defaults/features/coco_trainval2014.lmdb 12 | - coco/defaults/features/coco_trainval2014.lmdb 13 | annotations: 14 | train: 15 | - coco/defaults/annotations/imdb_karpathy_train_by_image.npy 16 | - coco/defaults/annotations/imdb_karpathy_val_by_image.npy 17 | -------------------------------------------------------------------------------- /projects/vilbert/configs/masked_conceptual_captions/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | vilbert: 3 | training_head_type: pretraining 4 | 5 | dataset_config: 6 | masked_conceptual_captions: 7 | return_features_info: true 8 | use_image_feature_masks: true 9 | 10 | optimizer: 11 | type: adam_w 12 | params: 13 | lr: 5e-5 14 | eps: 1e-8 15 | 16 | scheduler: 17 | type: warmup_linear 18 | params: 19 | num_warmup_steps: 1000 20 | num_training_steps: 11000 21 | 22 | training: 23 | batch_size: 480 24 | lr_scheduler: true 25 | # Don't forget to update schedule_attributes if you update this 26 | max_updates: 11000 27 | find_unused_parameters: true 28 | -------------------------------------------------------------------------------- /projects/vilbert/configs/masked_conceptual_captions/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/vilbert/configs/masked_vqa2/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | vilbert: 3 | training_head_type: pretraining 4 | 5 | dataset_config: 6 | masked_vqa2: 7 | annotations: 8 | train: 9 | - vqa2/defaults/annotations/imdb_train2014.npy 10 | return_features_info: true 11 | use_image_feature_masks: true 12 | 13 | optimizer: 14 | type: adam_w 15 | params: 16 | lr: 5e-5 17 | eps: 1e-8 18 | 19 | scheduler: 20 | type: warmup_linear 21 | params: 22 | num_warmup_steps: 1000 23 | num_training_steps: 11000 24 | 25 | training: 26 | batch_size: 480 27 | lr_scheduler: true 28 | # Don't forget to update schedule_attributes if you update this 29 | max_updates: 11000 30 | find_unused_parameters: true 31 | -------------------------------------------------------------------------------- /projects/vilbert/configs/masked_vqa2/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/vilbert/configs/masked_vqa2/pretrain_train_val.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | masked_vqa2: 6 | use_images: false 7 | use_features: true 8 | features: 9 | train: 10 | - coco/defaults/features/coco_trainval2014.lmdb 11 | - coco/defaults/features/coco_trainval2014.lmdb 12 | annotations: 13 | train: 14 | - vqa2/defaults/annotations/imdb_train2014.npy 15 | - vqa2/defaults/annotations/imdb_val2014.npy 16 | return_features_info: true 17 | use_image_feature_masks: true 18 | -------------------------------------------------------------------------------- /projects/vilbert/configs/mmimdb/pretrain.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/projects/vilbert/configs/mmimdb/pretrain.yaml -------------------------------------------------------------------------------- /projects/vilbert/configs/vqa2/train_val.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | vqa2: 6 | use_images: false 7 | use_features: true 8 | features: 9 | train: 10 | - coco/defaults/features/coco_trainval2014.lmdb 11 | - coco/defaults/features/coco_trainval2014.lmdb 12 | annotations: 13 | train: 14 | - vqa2/defaults/annotations/imdb_train2014.npy 15 | - vqa2/defaults/annotations/imdb_val2014.npy 16 | return_features_info: true 17 | -------------------------------------------------------------------------------- /projects/vilt/README.md: -------------------------------------------------------------------------------- 1 | # ViLT 2 | 3 | This repository contains the code for pytorch implementation of ViLT model, released originally under this ([repo](https://github.com/dandelin/ViLT)). Please cite the following papers if you are using ViLT model from mmf: 4 | 5 | * Wonjae Kim, Bokyung Son, and Ildoo Kim. 2021. *ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision}*. In 38th International Conference on Machine Learning (ICML). ([arXiV](https://arxiv.org/pdf/2102.03334)) 6 | ``` 7 | @misc{kim2021vilt, 8 | title={ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision}, 9 | author={Wonjae Kim and Bokyung Son and Ildoo Kim}, 10 | year={2021}, 11 | eprint={2102.03334}, 12 | archivePrefix={arXiv}, 13 | primaryClass={stat.ML} 14 | } 15 | ``` 16 | 17 | Please see [https://mmf.sh/docs/projects/vilt](https://mmf.sh/docs/projects/vilt) for more details on how to use the ViLT model. 18 | -------------------------------------------------------------------------------- /projects/vilt/configs/vqa2/vit_b16_224.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/unit/configs/vqa2_dataset_cfg.yaml 3 | - ./defaults.yaml 4 | 5 | other_configs: 6 | image_w: 224 7 | image_h: 224 8 | hidden_dim: 768 9 | 10 | model_config: 11 | vilt: 12 | image_encoder: 13 | type: vit 14 | params: 15 | random_init: False 16 | pretrained_model_name: google/vit-base-patch16-224 17 | image_size: 18 | - ${other_configs.image_w} 19 | - ${other_configs.image_h} 20 | hidden_dim: ${other_configs.hidden_dim} 21 | pretrained_model: vit_base_patch16_224 22 | mlp_dim: 3072 23 | 24 | text_embeddings: 25 | type: vilt_text_embedding 26 | bert_model_name: bert-base-uncased 27 | hidden_dim: ${other_configs.hidden_size} 28 | hidden_size: 768 29 | max_position_embeddings: 512 30 | random_init: False 31 | -------------------------------------------------------------------------------- /projects/vilt/configs/vqa2/vit_b32_384.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../projects/unit/configs/vqa2_dataset_cfg.yaml 3 | - ./defaults.yaml 4 | 5 | other_configs: 6 | image_w: 384 7 | image_h: 384 8 | hidden_dim: 768 9 | 10 | model_config: 11 | vilt: 12 | image_encoder: 13 | type: vit 14 | params: 15 | random_init: False 16 | pretrained_model_name: google/vit-base-patch32-384 17 | image_size: 18 | - ${other_configs.image_w} 19 | - ${other_configs.image_h} 20 | hidden_dim: ${other_configs.hidden_dim} 21 | pretrained_model: vit_base_patch32_384 22 | mlp_dim: 3072 23 | 24 | text_embeddings: 25 | type: vilt_text_embedding 26 | bert_model_name: bert-base-uncased 27 | hidden_dim: ${other_configs.hidden_dim} 28 | hidden_size: 768 29 | max_position_embeddings: 512 30 | random_init: False 31 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/hateful_memes/direct.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | training: 5 | batch_size: 128 6 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/hateful_memes/from_coco.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | checkpoint: 5 | resume_pretrained: true 6 | resume_zoo: visual_bert.pretrained.coco 7 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/localized_narratives/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | visual_bert: 3 | hidden_size: 768 4 | hidden_dropout_prob: 0.1 5 | training_head_type: classification 6 | num_labels: 3129 7 | 8 | dataset_config: 9 | masked_localized_narratives: 10 | return_features_info: true 11 | 12 | optimizer: 13 | type: adam_w 14 | params: 15 | lr: 5e-5 16 | eps: 1e-8 17 | 18 | scheduler: 19 | type: warmup_linear 20 | params: 21 | num_warmup_steps: 1000 22 | num_training_steps: 11000 23 | 24 | training: 25 | batch_size: 32 26 | lr_scheduler: true 27 | num_workers: 0 28 | # Don't forget to update schedule_attributes if you update this 29 | max_updates: 88000 30 | find_unused_parameters: true 31 | 32 | checkpoint: 33 | pretrained_state_mapping: 34 | model.bert: model.bert 35 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/localized_narratives/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ../../../../mmf/configs/datasets/coco2017/masked.yaml 3 | - ../../../../mmf/configs/datasets/flickr30k/masked.yaml 4 | - ../../../../mmf/configs/datasets/localized_narratives/masked.yaml 5 | 6 | model_config: 7 | visual_bert: 8 | training_head_type: pretraining 9 | 10 | optimizer: 11 | type: adam_w 12 | params: 13 | lr: 5e-5 14 | eps: 1e-8 15 | 16 | scheduler: 17 | type: warmup_linear 18 | params: 19 | num_warmup_steps: 1000 20 | num_training_steps: 11000 21 | 22 | training: 23 | batch_size: 32 24 | lr_scheduler: true 25 | num_workers: 0 26 | # Don't forget to update schedule_attributes if you update this 27 | max_updates: 88000 28 | find_unused_parameters: true 29 | 30 | checkpoint: 31 | pretrained_state_mapping: 32 | model.bert: model.bert 33 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/masked_coco/defaults.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | masked_coco: 3 | return_features_info: true 4 | 5 | optimizer: 6 | type: adam_w 7 | params: 8 | lr: 5e-5 9 | eps: 1e-8 10 | 11 | scheduler: 12 | type: warmup_linear 13 | params: 14 | num_warmup_steps: 1000 15 | num_training_steps: 11000 16 | 17 | training: 18 | batch_size: 480 19 | lr_scheduler: true 20 | # Don't forget to update schedule_attributes if you update this 21 | max_updates: 11000 22 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/masked_coco/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/masked_coco/pretrain_train_val.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | masked_coco: 6 | return_features_info: true 7 | use_images: false 8 | use_features: true 9 | features: 10 | train: 11 | - coco/defaults/features/trainval2014.lmdb 12 | - coco/defaults/features/trainval2014.lmdb 13 | annotations: 14 | train: 15 | - coco/defaults/annotations/imdb_karpathy_train_by_image.npy 16 | - coco/defaults/annotations/imdb_karpathy_val_by_image.npy 17 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/masked_conceptual_captions/defaults.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | masked_conceptual_captions: 3 | return_features_info: true 4 | 5 | optimizer: 6 | type: adam_w 7 | params: 8 | lr: 5e-5 9 | eps: 1e-8 10 | 11 | scheduler: 12 | type: warmup_linear 13 | params: 14 | num_warmup_steps: 1000 15 | num_training_steps: 11000 16 | 17 | training: 18 | batch_size: 480 19 | lr_scheduler: true 20 | # Don't forget to update schedule_attributes if you update this 21 | max_updates: 11000 22 | find_unused_parameters: true 23 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/masked_conceptual_captions/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/masked_gqa/defaults.yaml: -------------------------------------------------------------------------------- 1 | optimizer: 2 | type: adam_w 3 | params: 4 | lr: 5e-5 5 | eps: 1e-8 6 | 7 | scheduler: 8 | type: warmup_linear 9 | params: 10 | num_warmup_steps: 2000 11 | num_training_steps: 88000 12 | 13 | training: 14 | batch_size: 480 15 | lr_scheduler: true 16 | # Don't forget to update schedule_attributes if you update this 17 | max_updates: 88000 18 | find_unused_parameters: true 19 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/masked_sbu/defaults.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | visual_bert: 3 | bert_model_name: bert-base-uncased 4 | training_head_type: pretraining 5 | visual_embedding_dim: 2048 6 | special_visual_initialize: true 7 | hard_cap_seq_len: null 8 | cut_first: text 9 | embedding_strategy: plain 10 | bypass_transformer: false 11 | output_attentions: false 12 | output_hidden_states: false 13 | text_only: false 14 | random_initialize: false 15 | 16 | dataset_config: 17 | masked_sbu: 18 | return_features_info: true 19 | 20 | optimizer: 21 | type: adam_w 22 | params: 23 | lr: 5e-5 24 | eps: 1e-8 25 | 26 | scheduler: 27 | type: warmup_linear 28 | params: 29 | num_warmup_steps: 1000 30 | num_training_steps: 11000 31 | 32 | training: 33 | batch_size: 480 34 | lr_scheduler: true 35 | # Don't forget to update schedule_attributes if you update this 36 | max_updates: 11000 37 | find_unused_parameters: true 38 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/masked_sbu/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/masked_vqa2/defaults.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | masked_vqa2: 3 | annotations: 4 | train: 5 | - vqa2/defaults/annotations/imdb_train2014.npy 6 | return_features_info: true 7 | 8 | optimizer: 9 | type: adam_w 10 | params: 11 | lr: 5e-5 12 | eps: 1e-8 13 | 14 | scheduler: 15 | type: warmup_linear 16 | params: 17 | num_warmup_steps: 1000 18 | num_training_steps: 11000 19 | 20 | training: 21 | batch_size: 480 22 | lr_scheduler: true 23 | # Don't forget to update schedule_attributes if you update this 24 | max_updates: 11000 25 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/masked_vqa2/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/masked_vqa2/pretrain_train_val.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | masked_vqa2: 6 | use_images: false 7 | use_features: true 8 | features: 9 | train: 10 | - coco/defaults/features/coco_trainval2014.lmdb 11 | - coco/defaults/features/coco_trainval2014.lmdb 12 | annotations: 13 | train: 14 | - vqa2/defaults/annotations/imdb_train2014.npy 15 | - vqa2/defaults/annotations/imdb_val2014.npy 16 | return_features_info: true 17 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/mmimdb/pretrain.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | masked_mmimdb: 6 | return_features_info: true 7 | 8 | model_config: 9 | visual_bert: 10 | training_head_type: pretraining 11 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/vizwiz/train_val.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | vizwiz: 3 | return_features_info: true 4 | use_images: false 5 | use_features: true 6 | features: 7 | train: 8 | - vizwiz/v2019/features/detectron.lmdb 9 | - vizwiz/v2019/features/detectron.lmdb 10 | val: 11 | - vizwiz/v2019/features/detectron.lmdb 12 | test: 13 | - vizwiz/v2019/features/detectron.lmdb 14 | annotations: 15 | train: 16 | - datasets/vizwiz/imdbs/imdb_vizwiz_train.npy 17 | - datasets/vizwiz/imdbs/imdb_vizwiz_val.npy 18 | processors: 19 | # Stop fasttext from loading by overriding the context_processor 20 | context_processor: 21 | type: simple_word 22 | params: {} 23 | text_processor: 24 | type: bert_tokenizer 25 | params: 26 | tokenizer_config: 27 | type: bert-base-uncased 28 | params: 29 | do_lower_case: true 30 | mask_probability: 0 31 | max_seq_length: 128 32 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/vqa2/train_val.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | 4 | dataset_config: 5 | vqa2: 6 | use_images: false 7 | use_features: true 8 | features: 9 | train: 10 | - coco/defaults/features/coco_trainval2014.lmdb 11 | - coco/defaults/features/coco_trainval2014.lmdb 12 | annotations: 13 | train: 14 | - vqa2/defaults/annotations/imdb_train2014.npy 15 | - vqa2/defaults/annotations/imdb_val2014.npy 16 | return_features_info: true 17 | processors: 18 | text_processor: 19 | type: bert_tokenizer 20 | params: 21 | tokenizer_config: 22 | type: bert-base-uncased 23 | params: 24 | do_lower_case: true 25 | mask_probability: 0 26 | max_seq_length: 128 27 | -------------------------------------------------------------------------------- /projects/visual_bert/configs/vqa2/with_raw_images.yaml: -------------------------------------------------------------------------------- 1 | includes: 2 | - ./defaults.yaml 3 | - ../../../../mmf/configs/datasets/vqa2/with_raw_images.yaml 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.11.0 2 | torchaudio==0.11.0 3 | torchvision==0.12.0 4 | numpy>=1.16.6, <=1.21.4 5 | tqdm>=4.43.0,<4.50.0 6 | torchtext==0.12.0 7 | GitPython==3.1.30 8 | requests==2.23.0 9 | fasttext==0.9.1 10 | nltk==3.6.6 11 | editdistance==0.5.3 12 | transformers>=3.4.0, <=4.10.1 13 | sklearn==0.0 14 | omegaconf>=2.0.6, <=2.1 15 | lmdb==0.98 16 | termcolor==1.1.0 17 | iopath==0.1.8 18 | datasets==1.2.1 19 | matplotlib==3.3.4 20 | pycocotools==2.0.2 21 | ftfy==5.8 22 | pytorch-lightning==1.6.0 23 | psutil 24 | pillow==9.3.0 25 | sentencepiece 26 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from mmf.utils.patch import patch_transformers 3 | 4 | 5 | patch_transformers() 6 | -------------------------------------------------------------------------------- /tests/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tests/common/test_meter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import unittest 3 | 4 | import torch 5 | from mmf.common.meter import Meter 6 | from mmf.common.report import Report 7 | from mmf.common.sample import SampleList 8 | 9 | 10 | class TestMeter(unittest.TestCase): 11 | def test_meter_update_from_report(self): 12 | meter = Meter() 13 | prepared_batch = SampleList( 14 | {"targets": torch.tensor([1, 2, 3, 4]), "dataset_type": "val"} 15 | ) 16 | for idx in range(5): 17 | model_output = { 18 | "scores": torch.tensor([0, 1, 2, 3]), 19 | "losses": {"loss": float(idx)}, 20 | } 21 | report = Report(prepared_batch, model_output) 22 | meter.update_from_report(report) 23 | 24 | self.assertEqual(meter.loss.global_avg, 2.0) 25 | self.assertEqual(meter.loss.avg, 2.0) 26 | -------------------------------------------------------------------------------- /tests/configs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tests/data/user_dir/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # flake8: noqa: F401 3 | 4 | from . import datasets, models 5 | -------------------------------------------------------------------------------- /tests/data/user_dir/configs/always_one.yaml: -------------------------------------------------------------------------------- 1 | dataset_config: 2 | always_one: {} 3 | -------------------------------------------------------------------------------- /tests/data/user_dir/configs/experiment.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | simple: 3 | losses: 4 | - type: cross_entropy 5 | 6 | optimizer: 7 | type: SGD 8 | params: 9 | lr: 1e-3 10 | 11 | evaluation: 12 | metrics: 13 | - accuracy 14 | 15 | training: 16 | batch_size: 8 17 | lr_scheduler: false 18 | max_updates: 50 19 | early_stop: 20 | criteria: always_one/accuracy 21 | minimize: false 22 | log_format: json 23 | -------------------------------------------------------------------------------- /tests/data/user_dir/configs/simple.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | simple: 3 | in_dim: 1 4 | data_item_key: input 5 | -------------------------------------------------------------------------------- /tests/data/user_dir/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # flake8: noqa: F401 3 | 4 | from . import always_one 5 | -------------------------------------------------------------------------------- /tests/data/user_dir/datasets/always_one.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.common.registry import registry 4 | from mmf.datasets.base_dataset_builder import BaseDatasetBuilder 5 | from tests.test_utils import NumbersDataset 6 | 7 | 8 | DATASET_LEN = 20 9 | 10 | 11 | @registry.register_builder("always_one") 12 | class AlwaysOneBuilder(BaseDatasetBuilder): 13 | def __init__(self): 14 | super().__init__("always_one") 15 | 16 | def build(self, *args, **Kwargs): 17 | pass 18 | 19 | @classmethod 20 | def config_path(cls): 21 | return "configs/always_one.yaml" 22 | 23 | def load(self, config, dataset_type="train", *args, **kwargs): 24 | dataset = NumbersDataset(DATASET_LEN, data_item_key="input", always_one=True) 25 | dataset.dataset_name = self.dataset_name 26 | dataset.dataset_type = dataset_type 27 | return dataset 28 | -------------------------------------------------------------------------------- /tests/data/user_dir/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # flake8: noqa: F401 3 | 4 | from . import simple 5 | -------------------------------------------------------------------------------- /tests/data/user_dir/models/simple.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from mmf.common.registry import registry 4 | from tests.test_utils import SimpleModel 5 | 6 | 7 | @registry.register_model("simple") 8 | class CustomSimpleModel(SimpleModel): 9 | @classmethod 10 | def config_path(cls): 11 | return "configs/simple.yaml" 12 | 13 | def forward(self, sample_list): 14 | return {"scores": self.classifier(sample_list.input)} 15 | -------------------------------------------------------------------------------- /tests/data/vocab.txt: -------------------------------------------------------------------------------- 1 | a 2 | man 3 | with 4 | red 5 | helmet 6 | on 7 | small 8 | moped 9 | dirt 10 | road 11 | riding 12 | motor 13 | bike 14 | the 15 | countryside 16 | back 17 | of 18 | motorcycle 19 | -------------------------------------------------------------------------------- /tests/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tests/datasets/test_prediction_processors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import unittest 4 | 5 | import torch 6 | from mmf.common.report import Report 7 | from mmf.common.sample import SampleList 8 | from mmf.datasets.processors.prediction_processors import ArgMaxPredictionProcessor 9 | 10 | 11 | class TestDatasetProcessors(unittest.TestCase): 12 | def setUp(self): 13 | torch.manual_seed(1234) 14 | 15 | def test_argmax_prediction_processor(self): 16 | processor = ArgMaxPredictionProcessor(config={}) 17 | batch = SampleList({"id": torch.tensor([1, 2, 3, 4, 5], dtype=torch.long)}) 18 | model_output = {"scores": torch.rand(5, 4)} 19 | report = Report(batch, model_output) 20 | 21 | predictions = processor(report) 22 | 23 | expected_answers = [1, 1, 2, 1, 3] 24 | expected = [] 25 | for idx, answer in enumerate(expected_answers): 26 | expected.append({"id": idx + 1, "answer": answer}) 27 | 28 | self.assertEqual(predictions, expected) 29 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tests/models/interfaces/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tests/models/test_albef.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import unittest 3 | 4 | import torch 5 | from mmf.models.albef.vit import AlbefVitEncoder 6 | from omegaconf import OmegaConf 7 | from tests.test_utils import setup_proxy 8 | from torch import nn 9 | 10 | 11 | class TestAlbefEncoders(unittest.TestCase): 12 | def setUp(self): 13 | setup_proxy() 14 | 15 | def _test_init(self, cls, **params): 16 | encoder = cls.from_params(**params) 17 | self.assertTrue(isinstance(encoder, nn.Module)) 18 | 19 | def test_vision_transformer(self): 20 | config = OmegaConf.structured(AlbefVitEncoder.Config()) 21 | encoder = AlbefVitEncoder(config) 22 | x = torch.rand((1, 3, 224, 224)) 23 | output = encoder(x) 24 | self.assertEqual(output.size(-1), config.out_dim) 25 | -------------------------------------------------------------------------------- /tests/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tests/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tests/modules/test_hf_layers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import unittest 4 | 5 | from mmf.modules.hf_layers import replace_with_jit, undo_replace_with_jit 6 | 7 | try: 8 | from transformers3.modeling_bert import BertSelfAttention 9 | except ImportError: 10 | from transformers.modeling_bert import BertSelfAttention 11 | 12 | 13 | class TestHFLayers(unittest.TestCase): 14 | def test_undo_replace_with_jit(self): 15 | original_function = BertSelfAttention.forward 16 | replace_with_jit() 17 | undo_replace_with_jit() 18 | self.assertTrue(BertSelfAttention.forward is original_function) 19 | -------------------------------------------------------------------------------- /tests/trainers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tests/trainers/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tests/trainers/lightning/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tests/trainers/test_device.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import unittest 3 | 4 | import torch 5 | from mmf.trainers.core.device import TrainerDeviceMixin 6 | from mmf.utils.general import get_current_device 7 | from omegaconf import OmegaConf 8 | 9 | 10 | class DeviceMock(TrainerDeviceMixin): 11 | def __init__(self, config): 12 | self.config = config 13 | 14 | 15 | class TestDevice(unittest.TestCase): 16 | def test_current_device(self): 17 | config = { 18 | "training": {"seed": 1, "cudnn_benchmark": False}, 19 | "distributed": {"init_method": None}, 20 | } 21 | deviceMock = DeviceMock(OmegaConf.create(config)) 22 | deviceMock.configure_seed() 23 | deviceMock.configure_device() 24 | device = get_current_device() 25 | if torch.cuda.is_available(): 26 | self.assertEqual(device, "cuda:0") 27 | else: 28 | self.assertEqual(device, torch.device(type="cpu")) 29 | -------------------------------------------------------------------------------- /tests/trainers/test_eval_loop.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import unittest 4 | from unittest.mock import MagicMock, patch 5 | 6 | import torch 7 | from tests.trainers.test_utils import get_config_with_defaults, get_mmf_trainer 8 | 9 | 10 | class TestEvalLoop(unittest.TestCase): 11 | def setUp(self): 12 | torch.manual_seed(2) 13 | 14 | @patch( 15 | "mmf.common.test_reporter.PathManager", 16 | return_value=MagicMock(return_value=None), 17 | ) 18 | @patch("mmf.common.test_reporter.get_mmf_env", return_value="") 19 | def test_eval_loop(self, a, b): 20 | config = get_config_with_defaults( 21 | {"training": {"max_updates": 2, "max_epochs": 2}} 22 | ) 23 | trainer = get_mmf_trainer(config=config) 24 | combined_report, meter = trainer.evaluation_loop("val") 25 | self.assertAlmostEqual(combined_report["losses"]["loss"], 493377.5312) 26 | self.assertAlmostEqual(combined_report["logits"].item(), -0.2379742, 6) 27 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tests/utils/test_distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import unittest 3 | 4 | import mmf.utils.distributed as distributed 5 | 6 | 7 | class TestUtilsDistributed(unittest.TestCase): 8 | def test_object_byte_tensor_conversion(self): 9 | test_obj = [1, "2", {3: 4}, [5]] 10 | test_obj_bytes = distributed.object_to_byte_tensor(test_obj) 11 | test_obj_dec = distributed.byte_tensor_to_object(test_obj_bytes) 12 | self.assertEqual(test_obj_dec, test_obj) 13 | -------------------------------------------------------------------------------- /tests/utils/test_patch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import unittest 4 | 5 | from mmf.common.registry import registry 6 | from mmf.utils.patch import ( 7 | ORIGINAL_PATCH_FUNCTIONS_KEY, 8 | restore_saved_modules, 9 | safecopy_modules, 10 | ) 11 | 12 | 13 | class TestClass: 14 | @staticmethod 15 | def test_function(): 16 | return True 17 | 18 | 19 | class TestUtilsPatch(unittest.TestCase): 20 | def setUp(self): 21 | registry.register(ORIGINAL_PATCH_FUNCTIONS_KEY, {}) 22 | 23 | def test_safecopy_modules(self): 24 | safecopy_modules(["TestClass.test_function"], {"TestClass": TestClass}) 25 | original_functions = registry.get(ORIGINAL_PATCH_FUNCTIONS_KEY) 26 | self.assertTrue("TestClass.test_function" in original_functions) 27 | 28 | TestClass.test_function = lambda: False 29 | restore_saved_modules({"TestClass": TestClass}) 30 | self.assertTrue(TestClass.test_function()) 31 | -------------------------------------------------------------------------------- /tests/utils/test_timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import time 3 | import unittest 4 | 5 | from mmf.utils.timer import Timer 6 | 7 | 8 | class TestUtilsTimer(unittest.TestCase): 9 | def test_get_current(self): 10 | timer = Timer() 11 | expected = 0 12 | 13 | self.assertEqual(int(timer.get_current().split("ms")[0]), expected) 14 | 15 | def test_reset(self): 16 | timer = Timer() 17 | time.sleep(2) 18 | timer.reset() 19 | expected = 0 20 | 21 | self.assertEqual(int(timer.get_current().split("ms")[0]), expected) 22 | 23 | def test_get_time_since_start(self): 24 | timer = Timer() 25 | time.sleep(2) 26 | expected = 2 27 | 28 | self.assertEqual(expected, int(timer.get_time_since_start().split("s")[0])) 29 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tools/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /tools/scripts/bert/extract_bert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | N_REM=`expr $3 - 1` 3 | 4 | for i in $(seq 0 $N_REM); do 5 | python tools/scripts/bert/extract_bert_embeddings.py --imdb_path $1 --out_path $2 --group_id $i --n_groups $3 & 6 | done 7 | -------------------------------------------------------------------------------- /tools/scripts/gqa/README.md: -------------------------------------------------------------------------------- 1 | # Converstion of GQA to VQA format 2 | 3 | * Download GQA datasets and store as format shown in conversion script 4 | * Download glove embeddings 300D file 5 | * Run the script from the root of the repo as by changing relevant paths: 6 | 7 | ``` 8 | python tools/scripts/gqa/convert_gqa_to_vqa.py --gqa_dir --out_dir 9 | ``` 10 | -------------------------------------------------------------------------------- /tools/scripts/visual_dialog/extract_vocabulary.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import json 3 | 4 | from tools.scripts.gqa.extract_vocabulary import ExtractVocabulary 5 | 6 | 7 | class ExtractVisdialVocabulary(ExtractVocabulary): 8 | def __init__(self): 9 | super().__init__() 10 | 11 | def get_text(self): 12 | text = [] 13 | 14 | for input_file in self.input_files: 15 | with open(input_file) as f: 16 | f_json = json.load(f) 17 | # Add 'questions' from visdial 18 | text += f_json["data"]["questions"] 19 | # Add 'answers' from visdial 20 | text += f_json["data"]["answers"] 21 | 22 | for dialog in f_json["data"]["dialogs"]: 23 | text += [dialog["caption"]] 24 | return text 25 | 26 | 27 | if __name__ == "__main__": 28 | extractor = ExtractVisdialVocabulary() 29 | extractor.extract() 30 | -------------------------------------------------------------------------------- /tools/sweeps/README.md: -------------------------------------------------------------------------------- 1 | # Sweep Scripts 2 | 3 | See [https://mmf.sh/docs/tutorials/slurm](https://mmf.sh/docs/tutorials/slurm) for tutorial on how to use these scripts. 4 | -------------------------------------------------------------------------------- /website/.eslintignore: -------------------------------------------------------------------------------- 1 | .docusaurus 2 | static/api 3 | build/ 4 | -------------------------------------------------------------------------------- /website/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # Generated files 8 | .docusaurus 9 | .cache-loader 10 | 11 | # Misc 12 | .DS_Store 13 | .env.local 14 | .env.development.local 15 | .env.test.local 16 | .env.production.local 17 | 18 | npm-debug.log* 19 | yarn-debug.log* 20 | yarn-error.log* 21 | 22 | # ESLint 23 | .eslintcache 24 | 25 | # Static Docs 26 | static/api 27 | -------------------------------------------------------------------------------- /website/.prettierignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | build 3 | .docusaurus 4 | static/api 5 | -------------------------------------------------------------------------------- /website/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "arrowParens": "always", 3 | "bracketSpacing": false, 4 | "jsxBracketSameLine": true, 5 | "printWidth": 80, 6 | "proseWrap": "never", 7 | "singleQuote": true, 8 | "trailingComma": "all" 9 | } 10 | -------------------------------------------------------------------------------- /website/.stylelintrc.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | module.exports = { 9 | plugins: ['stylelint-copyright'], 10 | rules: { 11 | 'docusaurus/copyright-header': true, 12 | }, 13 | }; 14 | -------------------------------------------------------------------------------- /website/docs/getting_started/faqs.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: faqs 3 | title: Frequently Asked Questions (FAQ) 4 | sidebar_label: FAQs 5 | --- 6 | ## Coming Soon! 7 | -------------------------------------------------------------------------------- /website/docs/getting_started/video_overview.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: video_overview 3 | title: Video overview 4 | sidebar_label: Video overview 5 | --- 6 | 7 |
8 | 9 |
10 | -------------------------------------------------------------------------------- /website/src/pages/api_redirect/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | * 7 | * @format 8 | */ 9 | import React from 'react'; 10 | import BrowserOnly from '@docusaurus/BrowserOnly'; 11 | import {useHistory} from 'react-router-dom'; 12 | 13 | const API = () => { 14 | const history = useHistory(); 15 | history.push('/'); 16 | return ( 17 | Some Fallback Content

}> 18 | {() => { 19 | window.location.href = '/api'; 20 | }} 21 |
22 | ); 23 | }; 24 | 25 | export default API; 26 | -------------------------------------------------------------------------------- /website/static/.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # This config file will prevent tests from being run on the gh-pages branch. 2 | version: 2 3 | jobs: 4 | build: 5 | machine: true 6 | branches: 7 | ignore: gh-pages 8 | steps: 9 | -run: echo "Skipping tests on gh-pages branch" 10 | -------------------------------------------------------------------------------- /website/static/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/.nojekyll -------------------------------------------------------------------------------- /website/static/CNAME: -------------------------------------------------------------------------------- 1 | mmf.sh 2 | -------------------------------------------------------------------------------- /website/static/img/boilerplate.svg: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /website/static/img/boilerplate_white.svg: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /website/static/img/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/favicon.png -------------------------------------------------------------------------------- /website/static/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/logo.png -------------------------------------------------------------------------------- /website/static/img/logo_white_f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/logo_white_f.png -------------------------------------------------------------------------------- /website/static/img/oss_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/mmf/4197e59e85e1ea5e01b6d307762f7e993421e876/website/static/img/oss_logo.png -------------------------------------------------------------------------------- /website/static/img/pytorch_logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | pytorch_logo 9 | 11 | 13 | 14 | -------------------------------------------------------------------------------- /website/static/img/pytorch_logo_white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | pytorch_logo 9 | 11 | 13 | 14 | --------------------------------------------------------------------------------