├── .gitignore ├── LEGAL.md ├── LICENSE.txt ├── README.md ├── README_EN.md ├── antmmf ├── .DS_Store ├── __init__.py ├── common │ ├── __init__.py │ ├── batch_collator.py │ ├── build.py │ ├── checkpoint.py │ ├── configurable.py │ ├── configuration.py │ ├── constants.py │ ├── data_updater.py │ ├── defaults │ │ └── configs │ │ │ └── base.yml │ ├── meter.py │ ├── metrics_reporter.py │ ├── registry.py │ ├── report.py │ ├── task_loader.py │ └── test_reporter.py ├── datasets │ ├── .DS_Store │ ├── __init__.py │ ├── base_dataset.py │ ├── base_dataset_builder.py │ ├── build.py │ ├── concat_dataset.py │ ├── data_module.py │ ├── database │ │ ├── __init__.py │ │ ├── annotated.py │ │ ├── features_database.py │ │ ├── graph_database.py │ │ ├── image_database.py │ │ └── video_database.py │ ├── features │ │ ├── __init__.py │ │ ├── feature_readers.py │ │ └── vision │ │ │ ├── __init__.py │ │ │ ├── base_extractor.py │ │ │ ├── detectron_feature.py │ │ │ ├── feature_saver.py │ │ │ ├── imagenet_feature.py │ │ │ └── video_feature.py │ ├── mm_dataset.py │ ├── multi_dataloader.py │ ├── processors │ │ ├── __init__.py │ │ ├── image_processors.py │ │ ├── mm_processors.py │ │ ├── processors.py │ │ ├── text_processors.py │ │ ├── transforms │ │ │ ├── __init__.py │ │ │ └── detection.py │ │ └── video_processors.py │ ├── samplers.py │ └── utils.py ├── models │ ├── __init__.py │ ├── ant_mmf.py │ ├── base_adversarial.py │ ├── base_model.py │ ├── bert.py │ ├── build.py │ ├── cnn.py │ ├── cnn_lstm.py │ ├── comp_gcn.py │ ├── concat_bert.py │ ├── image_classification.py │ ├── layoutlm.py │ ├── mm_adversarial.py │ ├── mmbt.py │ ├── multitask_model.py │ ├── nlp_adversarial.py │ ├── s3dg.py │ ├── spkResNet.py │ ├── top_down_bottom_up.py │ ├── vilbert.py │ └── visual_bert.py ├── modules │ ├── .DS_Store │ ├── __init__.py │ ├── approx_compute.py │ ├── attention.py │ ├── build.py │ ├── classifier │ │ ├── __init__.py │ │ ├── bert_classifier_head.py │ │ ├── classifier_layer.py │ │ ├── logit_classifier.py │ │ ├── transformer_decoder.py │ │ └── weight_norm_classifier.py │ ├── decoders │ │ ├── __init__.py │ │ ├── decoder.py │ │ ├── graph │ │ │ ├── __init__.py │ │ │ ├── classify_decoder.py │ │ │ ├── delta_kg_classify_decoder.py │ │ │ ├── delta_kg_decoder.py │ │ │ ├── ffn_decoder.py │ │ │ └── graph_decoder.py │ │ ├── hierarchical_classifier.py │ │ ├── language_decoder.py │ │ └── transformer_decoder_model.py │ ├── embeddings │ │ ├── __init__.py │ │ ├── antmmf_embeddings.py │ │ ├── bert_vision_linguistic_embeddings.py │ │ ├── clip_visual_embedding.py │ │ ├── detr_position_embedding_learned.py │ │ ├── detr_position_embedding_sine.py │ │ ├── image_bert_embeddings.py │ │ ├── image_embedding.py │ │ ├── layout_lm_embeddings.py │ │ ├── text_embedding.py │ │ ├── univl_layout_embedding.py │ │ └── visual_layout_embeddings.py │ ├── encoders │ │ ├── __init__.py │ │ ├── graph │ │ │ ├── __init__.py │ │ │ ├── continuous_time_encoder.py │ │ │ ├── delta_kg_encoder.py │ │ │ ├── gat.py │ │ │ ├── gat_adj_matrix.py │ │ │ ├── graph_encoder.py │ │ │ └── naive_attention_based_encoder.py │ │ ├── image_feature_encoder.py │ │ ├── multimodal_bert_clf.py │ │ ├── multimodal_bert_for_pretraining.py │ │ ├── multimodal_encoder.py │ │ ├── text_encoder.py │ │ ├── utils.py │ │ └── visual_encoder.py │ ├── functional │ │ ├── __init__.py │ │ ├── set_criterion.py │ │ └── swish.py │ ├── fusions.py │ ├── graph.py │ ├── interpret │ │ ├── __init__.py │ │ ├── integrated_gradient.py │ │ ├── saliency_interpreter.py │ │ └── simple_gradient.py │ ├── layers │ │ ├── __init__.py │ │ ├── conditional_layer_norm.py │ │ ├── consensus_module.py │ │ ├── conv_net.py │ │ ├── crf.py │ │ ├── exu.py │ │ ├── feats_joint.py │ │ ├── frozen_batchnorm.py │ │ ├── gated_tanh.py │ │ ├── linear.py │ │ ├── mb_conv_block.py │ │ ├── mlp.py │ │ ├── mlp_attention.py │ │ ├── modal_combine_layer.py │ │ ├── padding.py │ │ ├── swish.py │ │ ├── transform_layer.py │ │ └── vae.py │ ├── losses │ │ ├── __init__.py │ │ ├── asymmetric_loss_optimized.py │ │ ├── attention_supervision_loss.py │ │ ├── binary_cross_entropy.py │ │ ├── binary_cross_entropy_with_label_smoothing.py │ │ ├── binary_cross_entropy_with_logits.py │ │ ├── caption_cross_entropy.py │ │ ├── combined_loss.py │ │ ├── cos_ams_softmax_loss.py │ │ ├── cos_arc_loss.py │ │ ├── cross_entropy_loss.py │ │ ├── eet_loss.py │ │ ├── hierarchical_multilabel_loss.py │ │ ├── hierarchical_softmax_loss.py │ │ ├── info_nce_loss.py │ │ ├── kg_margin_contrastive_loss.py │ │ ├── knowledge_distill_loss.py │ │ ├── label_smoothing_cross_entropy.py │ │ ├── losses.py │ │ ├── m4c_decoding_bce_with_mask_loss.py │ │ ├── mil_margin_contrastive_loss.py │ │ ├── mil_nce_loss.py │ │ ├── mse_loss.py │ │ ├── multi_label_category_cross_entropy_loss.py │ │ ├── multi_loss.py │ │ ├── nce_loss.py │ │ ├── nll_loss.py │ │ ├── ordinal_loss.py │ │ ├── pairwise_loss.py │ │ ├── softmax_focal_loss.py │ │ ├── softmax_kl_div_loss.py │ │ ├── weighted_softmax_loss.py │ │ └── wrong_loss.py │ ├── matcher.py │ ├── message_passing │ │ ├── __init__.py │ │ ├── delta_conv.py │ │ ├── message_passing.py │ │ ├── qkv_attention_graph_conv.py │ │ └── relation_wise_norm_conv.py │ ├── metrics │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── asm.py │ │ ├── base_metric.py │ │ ├── bleu4.py │ │ ├── caption_bleu4.py │ │ ├── evaluators │ │ │ ├── __init__.py │ │ │ └── coco_eval.py │ │ ├── f1.py │ │ ├── global_retrieval_recall.py │ │ ├── hier_label_accuracy.py │ │ ├── hier_multilabel_f1.py │ │ ├── ks.py │ │ ├── map.py │ │ ├── mce_accuracy.py │ │ ├── mean_rank.py │ │ ├── mean_reciprocal_rank.py │ │ ├── metrics.py │ │ ├── mm_retrieval_recall.py │ │ ├── multi_accuracy.py │ │ ├── multi_macro_f1.py │ │ ├── rank_and_hits.py │ │ ├── recall_at_k.py │ │ ├── rmce_accuracy.py │ │ ├── roc_auc.py │ │ ├── rouge_antmmf.py │ │ ├── span_f1.py │ │ └── utils.py │ ├── module_registry.py │ ├── transformers │ │ ├── __init__.py │ │ ├── base.py │ │ ├── heads │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── detr.py │ │ │ ├── itm.py │ │ │ ├── mlm.py │ │ │ └── mrc.py │ │ └── position_enhance.py │ ├── utils.py │ └── vision │ │ ├── __init__.py │ │ ├── backbone │ │ ├── __init__.py │ │ ├── cctt.py │ │ ├── clip │ │ │ ├── __init__.py │ │ │ ├── cn_model.py │ │ │ ├── cn_tokenizer.py │ │ │ ├── configuration_bert.py │ │ │ ├── model.py │ │ │ ├── modeling_bert.py │ │ │ ├── simple_tokenizer.py │ │ │ └── vocab.txt │ │ ├── efficientnet.py │ │ ├── pvt.py │ │ └── video_swin.py │ │ ├── necks │ │ └── BackboneWithFPN.py │ │ ├── non_local.py │ │ └── temporal_shift.py ├── optimizer │ ├── __init__.py │ ├── adan.py │ ├── adv_free_lb.py │ ├── basic_optimizers.py │ ├── build.py │ └── combine_optimizers.py ├── predictors │ ├── __init__.py │ ├── base_predictor.py │ ├── batch_predictor.py │ ├── build.py │ ├── mmbt_predictor.py │ └── multitask_predictor.py ├── run.py ├── scripts │ ├── extract_vocabulary.py │ ├── features │ │ ├── extract_features.md │ │ ├── extract_features.py │ │ ├── extract_features_vmb.py │ │ ├── extract_resnet152_feat.py │ │ ├── extract_resnet_features.py │ │ └── lmdb_conversion.py │ └── hm_convert.py ├── structures │ ├── __init__.py │ ├── base.py │ ├── boxes.py │ ├── images.py │ ├── nested_tensor.py │ ├── sample.py │ └── utils.py ├── tasks │ ├── __init__.py │ └── base_task.py ├── trainers │ ├── __init__.py │ ├── adv_trainer.py │ ├── base_trainer.py │ ├── build.py │ ├── distill_trainer.py │ ├── remote_trainer.py │ └── retrieval_trainer.py └── utils │ ├── .DS_Store │ ├── __init__.py │ ├── dataset_utils.py │ ├── distributed.py │ ├── distributed_utils.py │ ├── download.py │ ├── early_stopping.py │ ├── env.py │ ├── file_io.py │ ├── flags.py │ ├── general.py │ ├── glob.py │ ├── image_ops.py │ ├── init.py │ ├── inspector.py │ ├── launch.py │ ├── logger.py │ ├── optim_utils.py │ ├── phoc │ ├── __init__.py │ ├── build_phoc.py │ └── src │ │ ├── cphoc.c │ │ └── src_note.md │ ├── register_fp32.py │ ├── scatter.py │ ├── tensor_utils.py │ ├── text_utils.py │ ├── timer.py │ ├── video_utils.py │ ├── visual_utils │ ├── .DS_Store │ ├── __init__.py │ ├── palette.py │ ├── vis_utils.py │ └── visualization_utils.py │ ├── visualize.py │ └── vocab.py ├── prj ├── EVE │ └── README.md ├── M2_Encoder │ ├── README.md │ ├── __init__.py │ ├── configs │ │ ├── Encoder_0.4B.json │ │ ├── Encoder_10B.json │ │ └── Encoder_1B.json │ ├── data │ │ ├── coco-cn_test.jsonl │ │ ├── coco_caption_karpathy_test.jsonl │ │ ├── f30k-cn_test.jsonl │ │ └── f30k_caption_karpathy_test.jsonl │ ├── eval_retrieval.py │ ├── m2_encoder.py │ ├── ms_wrapper.py │ ├── pics │ │ ├── cn_imagenet_cls.jpg │ │ ├── cn_retrieval.jpg │ │ ├── effect.png │ │ ├── en_imagenet_cls.jpg │ │ ├── en_retrieval.jpg │ │ ├── fine-grained.jpg │ │ └── pokemon.jpeg │ ├── requirements.txt │ ├── run.py │ └── vlmo │ │ ├── __init__.py │ │ ├── config.py │ │ ├── modules │ │ ├── __init__.py │ │ ├── heads.py │ │ ├── modeling_utils.py │ │ ├── multiway_transformer.py │ │ ├── objectives.py │ │ ├── vlmo_module.py │ │ └── vlmo_utils.py │ │ ├── tokenizer │ │ ├── __init__.py │ │ ├── sp.model │ │ ├── tokenization_glm.py │ │ └── tokenizer_config.json │ │ ├── torchscale │ │ ├── __init__.py │ │ ├── architecture │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── decoder.py │ │ │ ├── encoder.py │ │ │ ├── encoder_decoder.py │ │ │ └── utils.py │ │ ├── component │ │ │ ├── __init__.py │ │ │ ├── droppath.py │ │ │ ├── embedding.py │ │ │ ├── feedforward_network.py │ │ │ ├── multihead_attention.py │ │ │ ├── multiway_network.py │ │ │ ├── relative_position_bias.py │ │ │ ├── xmoe │ │ │ │ ├── __init__.py │ │ │ │ ├── moe_layer.py │ │ │ │ └── routing.py │ │ │ └── xpos_relative_position.py │ │ └── model │ │ │ ├── BEiT3.py │ │ │ └── __init__.py │ │ ├── transforms │ │ ├── __init__.py │ │ ├── pixelbert.py │ │ ├── randaug.py │ │ ├── randaugment.py │ │ ├── square_transform.py │ │ └── utils.py │ │ └── utils │ │ ├── __init__.py │ │ ├── beit_utils.py │ │ └── patch_utils.py ├── M2_RAAP │ └── README.md ├── M2_omni │ ├── README.md │ ├── data │ │ ├── audioqa.wav │ │ ├── m2-omni.png │ │ ├── plant.png │ │ └── video1.mp4 │ ├── models │ │ ├── __init__.py │ │ ├── configuration_llama_3d.py │ │ ├── configuration_m2omni.py │ │ ├── configuration_qwen2_vit.py │ │ ├── feature_extraction_sanm.py │ │ ├── image_processing_m2omni.py │ │ ├── m2omni_utils.py │ │ ├── modeling_llama_3d.py │ │ ├── modeling_m2omni.py │ │ ├── processing_m2omni.py │ │ ├── qwen2_vit.py │ │ └── sanm_audio.py │ └── requirements.txt ├── Pink │ ├── 47.png │ ├── README.md │ ├── dataset_generation │ │ ├── coco_detection.py │ │ ├── object365_detection.py │ │ ├── pointing_vqa_local.py │ │ ├── pointing_vqa_look_twice.py │ │ ├── v7w_pointing.py │ │ ├── v7w_telling.py │ │ └── visual_genome2vg.py │ ├── demo.py │ ├── image.png │ ├── inference.ipynb │ ├── nash_high.jpeg │ ├── pink │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── conversation.py │ │ ├── datasets │ │ │ ├── AOKVQA.py │ │ │ ├── BaseDataset.py │ │ │ ├── COCOCaption.py │ │ │ ├── FlickrCaption.py │ │ │ ├── FlickrEntity.py │ │ │ ├── GQA.py │ │ │ ├── LLaVA.py │ │ │ ├── LLaVACaption.py │ │ │ ├── OKVQA.py │ │ │ ├── Object365.py │ │ │ ├── PointingVQALocal.py │ │ │ ├── PointingVQALookTwice.py │ │ │ ├── PretrainCaption.py │ │ │ ├── ProbMergeDataset.py │ │ │ ├── Templates.py │ │ │ ├── V7WGrounding.py │ │ │ ├── VQAv2.py │ │ │ ├── VSR.py │ │ │ ├── VisualGenome.py │ │ │ ├── VisualGrounding.py │ │ │ └── __init__.py │ │ ├── eval │ │ │ ├── eval_gqa.py │ │ │ ├── eval_pointingvqa_local.py │ │ │ ├── eval_pointingvqa_looktwice.py │ │ │ ├── eval_v7wgrounding.py │ │ │ ├── eval_vg.py │ │ │ ├── eval_vqav2.py │ │ │ ├── model_gqa.py │ │ │ ├── model_iconqa.py │ │ │ ├── model_object365.py │ │ │ ├── model_okvqa.py │ │ │ ├── model_pointingvqa_local.py │ │ │ ├── model_pointingvqa_looktwice.py │ │ │ ├── model_seed.py │ │ │ ├── model_v7wgrounding.py │ │ │ ├── model_vg_base_batch.py │ │ │ ├── model_vqav2.py │ │ │ ├── model_vsr.py │ │ │ ├── object365_filter.py │ │ │ └── vqa_tools │ │ │ │ ├── vqa.py │ │ │ │ ├── vqa_eval.py │ │ │ │ └── vqa_result.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── adapter.py │ │ │ ├── eva_vit.py │ │ │ ├── pink.py │ │ │ └── utils.py │ │ └── train │ │ │ ├── pink_trainer.py │ │ │ └── train.py │ ├── pyproject.toml │ └── scripts │ │ ├── eval_refcoco.sh │ │ ├── object365_generate.sh │ │ ├── stage1.sh │ │ ├── stage2.sh │ │ └── stage2_with_object365.sh ├── README.md ├── base_vtp │ ├── README.md │ ├── README_base_vtp_en.md │ ├── configs │ │ ├── roi_modelling │ │ │ └── roi_model_pretrain.yml │ │ └── univl │ │ │ ├── image │ │ │ └── pretrain │ │ │ │ └── univl_pretrain.yml │ │ │ └── video │ │ │ ├── finetune_classification │ │ │ ├── univl_classification_for_action_recognition_ucf101.local.yml │ │ │ ├── univl_classification_for_action_recognition_ucf101.yml │ │ │ ├── univl_classification_for_msrvtt_qa.yml │ │ │ ├── univl_classification_for_msrvtt_qa_videoswin.yml │ │ │ └── univl_video_text_classification.yml │ │ │ ├── finetune_multi_choice_qa │ │ │ ├── base.yml │ │ │ ├── msr_vtt_mc_qa_pvt.local.yml │ │ │ ├── msr_vtt_mc_qa_pvt.yml │ │ │ └── msr_vtt_mc_qa_videoswin.yml │ │ │ ├── finetune_retrieval │ │ │ ├── CN_vatex_pvt.yml │ │ │ ├── CN_vatex_pvt_local.yml │ │ │ ├── CN_vatex_videoswin.yml │ │ │ ├── base.yml │ │ │ ├── didemo_pvt.yml │ │ │ ├── msr_vtt.yml │ │ │ ├── msr_vtt_pvt.local.yml │ │ │ ├── msr_vtt_pvt.yml │ │ │ └── msr_vtt_videoswin.yml │ │ │ ├── pretrain │ │ │ ├── CN_video_videoswin.yml │ │ │ ├── base.yml │ │ │ ├── chinese.yml │ │ │ ├── coco_vg.local.yml │ │ │ ├── coco_vg.yml │ │ │ ├── coco_vg_pvt.yml │ │ │ ├── coco_vg_videoswin.yml │ │ │ ├── howto100m_coco_vg_rnd_asr.yml │ │ │ ├── video_swin.yml │ │ │ └── webvid_videoswin.yml │ │ │ └── visual_encoder │ │ │ ├── pvt.yml │ │ │ ├── resnet.yml │ │ │ └── video_swin.yml │ ├── roi_univl │ │ ├── __init__.py │ │ ├── roi │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── dataset.py │ │ │ ├── model.py │ │ │ ├── region_processor.py │ │ │ └── task.py │ │ └── univl │ │ │ ├── __init__.py │ │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── clip_text_encoder.py │ │ │ ├── clip_visual_encoder.py │ │ │ ├── moco_utils.py │ │ │ ├── univl_base.py │ │ │ ├── univl_classification.py │ │ │ ├── univl_model.py │ │ │ ├── univl_pretrain.py │ │ │ ├── univl_video_base.py │ │ │ ├── univl_video_cls.py │ │ │ ├── univl_video_multi_choice_qa.py │ │ │ ├── univl_video_pretrain.py │ │ │ └── univl_video_ret.py │ │ │ ├── pretrain_img_text │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ │ ├── pretrain_video_text │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ │ ├── processors.py │ │ │ ├── task.py │ │ │ └── video_text │ │ │ ├── __init__.py │ │ │ ├── cls_builder.py │ │ │ ├── cls_dataset.py │ │ │ ├── mc_qa_builder.py │ │ │ ├── mc_qa_dataset.py │ │ │ ├── ret_builder.py │ │ │ └── ret_dataset.py │ ├── run.py │ └── scripts │ │ ├── finetune │ │ ├── mcvqa_msr_vtt_mc_qa_videoswin.sh │ │ ├── ret_msr_vtt_videoswin.sh │ │ └── vqa_msrvtt_qa_videoswin.sh │ │ ├── local_test │ │ ├── coco_vg.local.sh │ │ └── msr_vtt_pvt.local.sh │ │ └── pretrain │ │ ├── coco_vg_videoswin.sh │ │ └── webvid_videoswin.sh ├── cnvid_vtp │ ├── CODEBASE_CN.md │ ├── CODEBASE_EN.md │ ├── DATASET.md │ ├── LEGAL.md │ ├── LICENSE │ ├── README.md │ ├── TERMS.md │ ├── configs │ │ ├── roi_modelling │ │ │ └── roi_model_pretrain.yml │ │ └── univl │ │ │ ├── image │ │ │ └── pretrain │ │ │ │ └── univl_pretrain.yml │ │ │ └── video │ │ │ ├── finetune_retrieval │ │ │ ├── CN_didemo_videoswin.yml │ │ │ ├── CN_msrvtt_videoswin.yml │ │ │ ├── CN_vatex_videoswin.yml │ │ │ ├── EN_didemo_videoswin.yml │ │ │ ├── EN_msr_vtt_videoswin.yml │ │ │ ├── EN_vatex_videoswin.yml │ │ │ └── base.yml │ │ │ ├── pretrain │ │ │ ├── CN_video_videoswin.yml │ │ │ ├── EN_coco_vg_cc_videoswin.yml │ │ │ ├── EN_webvid_videoswin.yml │ │ │ ├── base.yml │ │ │ ├── chinese.yml │ │ │ ├── howto100m_coco_vg_rnd_asr.yml │ │ │ ├── quick_test.yml │ │ │ └── video_swin.yml │ │ │ └── visual_encoder │ │ │ └── video_swin.yml │ ├── demo_figs │ │ ├── adj_count_t50.jpg │ │ ├── experiment_result.jpg │ │ ├── keyword_cloud_t200.jpg │ │ ├── motivation.jpg │ │ ├── noun_count_t50.jpg │ │ ├── teaser_figure.jpg │ │ ├── time_count.jpg │ │ ├── topic_cloud_t200.jpg │ │ ├── verb_count_t50.jpg │ │ └── video_example.jpg │ ├── download_cnvid │ │ └── download_cnvid_video.py │ ├── requirements.txt │ ├── roi_univl │ │ ├── __init__.py │ │ ├── roi │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── dataset.py │ │ │ ├── model.py │ │ │ ├── region_processor.py │ │ │ └── task.py │ │ └── univl │ │ │ ├── __init__.py │ │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── moco_utils.py │ │ │ ├── univl_base.py │ │ │ ├── univl_model.py │ │ │ ├── univl_pretrain.py │ │ │ ├── univl_video_base.py │ │ │ ├── univl_video_pretrain.py │ │ │ └── univl_video_ret.py │ │ │ ├── pretrain_video_text │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ │ ├── processors.py │ │ │ ├── task.py │ │ │ └── video_text │ │ │ ├── __init__.py │ │ │ ├── ret_builder.py │ │ │ └── ret_dataset.py │ ├── run.py │ └── scripts │ │ ├── finetune │ │ ├── CN_ret_didemo_videoswin.sh │ │ ├── CN_ret_msr_vtt_videoswin.sh │ │ ├── CN_ret_vatex_videoswin.sh │ │ ├── EN_ret_didemo_videoswin.sh │ │ ├── EN_ret_msr_vtt_videoswin.sh │ │ └── EN_ret_vatex_videoswin.sh │ │ ├── local_test │ │ └── coco_vg.local.sh │ │ └── pretrain │ │ ├── CN_cnvid_pt_videoswin.sh │ │ ├── EN_coco_vg_cc_pt_videoswin.sh │ │ └── EN_webvid_pt_videoswin.sh ├── dmae_vtp │ ├── README.md │ ├── configs │ │ └── univl │ │ │ └── video │ │ │ ├── finetune_retrieval │ │ │ ├── CN_vatex_pvt.yml │ │ │ ├── CN_vatex_pvt_local.yml │ │ │ ├── CN_vatex_videoswin.yml │ │ │ ├── base.yml │ │ │ ├── didemo_pvt.yml │ │ │ ├── msr_vtt.yml │ │ │ ├── msr_vtt_pvt.local.yml │ │ │ ├── msr_vtt_pvt.yml │ │ │ └── msr_vtt_videoswin.yml │ │ │ └── visual_encoder │ │ │ ├── pvt.yml │ │ │ ├── resnet.yml │ │ │ └── video_swin.yml │ ├── demo_figs │ │ └── simple_framework.png │ ├── roi_univl │ │ ├── __init__.py │ │ ├── roi │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── dataset.py │ │ │ ├── model.py │ │ │ ├── region_processor.py │ │ │ └── task.py │ │ └── univl │ │ │ ├── __init__.py │ │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── clip_text_encoder.py │ │ │ ├── clip_visual_encoder.py │ │ │ ├── dmae_utils.py │ │ │ ├── moco_utils.py │ │ │ ├── tpmcl_utils.py │ │ │ ├── univl_base.py │ │ │ ├── univl_classification.py │ │ │ ├── univl_model.py │ │ │ ├── univl_pretrain.py │ │ │ ├── univl_video_base.py │ │ │ ├── univl_video_cls.py │ │ │ ├── univl_video_multi_choice_qa.py │ │ │ ├── univl_video_pretrain.py │ │ │ └── univl_video_ret.py │ │ │ ├── pretrain_img_text │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ │ ├── pretrain_video_text │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── dataset.py │ │ │ ├── processors.py │ │ │ ├── task.py │ │ │ └── video_text │ │ │ ├── __init__.py │ │ │ ├── cls_builder.py │ │ │ ├── cls_dataset.py │ │ │ ├── mc_qa_builder.py │ │ │ ├── mc_qa_dataset.py │ │ │ ├── ret_builder.py │ │ │ └── ret_dataset.py │ ├── run.py │ └── scripts │ │ └── local_test │ │ ├── msr_vtt_pvt.eval.sh │ │ ├── msr_vtt_pvt.local.sh │ │ └── msr_vtt_pvt.train.sh └── snps3_vtp │ ├── CODEBASE_cn.md │ ├── CODEBASE_en.md │ ├── README.md │ ├── auxiliary_files │ ├── count_reprocess_webvid.json │ ├── generate_ss_word_json │ │ ├── 1st_ss_word_mining.py │ │ ├── 2nd_get_what_word_is_important.py │ │ └── 3rd_get_json_csv_reprocess.py │ └── vocab_simi_list_new.json │ ├── configs │ ├── roi_modelling │ │ └── roi_model_pretrain.yml │ └── univl │ │ ├── image │ │ └── pretrain │ │ │ └── univl_pretrain.yml │ │ └── video │ │ ├── finetune_classification │ │ ├── msr_vtt_qa_pvt.yml │ │ ├── msr_vtt_qa_videoswin.yml │ │ ├── msvd_qa_pvt.yml │ │ ├── msvd_qa_videoswin.yml │ │ └── univl_video_text_classification.yml │ │ ├── finetune_multi_choice_qa │ │ ├── base.yml │ │ ├── msr_vtt_mc_qa_pvt.yml │ │ └── msr_vtt_mc_qa_videoswin.yml │ │ ├── finetune_retrieval │ │ ├── base.yml │ │ ├── didemo_pvt.yml │ │ ├── didemo_videoswin.yml │ │ ├── msr_vtt_pvt.yml │ │ ├── msr_vtt_videoswin.yml │ │ ├── msvd_pvt.yml │ │ └── msvd_videoswin.yml │ │ ├── pretrain │ │ ├── base.yml │ │ ├── cc_videoswin.yml │ │ ├── coco_vg_pvt.yml │ │ ├── coco_vg_resnet.yml │ │ ├── howto100m_coco_vg_rnd_asr.yml │ │ ├── quick_test.yml │ │ ├── video_swin.yml │ │ └── webvid_videoswin.yml │ │ └── visual_encoder │ │ ├── pvt.yml │ │ ├── resnet.yml │ │ └── video_swin.yml │ ├── roi_univl │ ├── __init__.py │ ├── roi │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── dataset.py │ │ ├── model.py │ │ ├── region_processor.py │ │ └── task.py │ └── univl │ │ ├── __init__.py │ │ ├── model │ │ ├── __init__.py │ │ ├── clip_text_encoder.py │ │ ├── clip_visual_encoder.py │ │ ├── moco_utils.py │ │ ├── univl_base.py │ │ ├── univl_classification.py │ │ ├── univl_model.py │ │ ├── univl_pretrain.py │ │ ├── univl_video_base.py │ │ ├── univl_video_cls.py │ │ ├── univl_video_multi_choice_qa.py │ │ ├── univl_video_pretrain.py │ │ └── univl_video_ret.py │ │ ├── pretrain_img_text │ │ ├── __init__.py │ │ ├── builder.py │ │ └── dataset.py │ │ ├── pretrain_video_text │ │ ├── __init__.py │ │ ├── builder.py │ │ └── dataset.py │ │ ├── processors.py │ │ ├── task.py │ │ └── video_text │ │ ├── __init__.py │ │ ├── cls_builder.py │ │ ├── cls_dataset.py │ │ ├── mc_qa_builder.py │ │ ├── mc_qa_dataset.py │ │ ├── ret_builder.py │ │ └── ret_dataset.py │ ├── run.py │ ├── scripts │ ├── finetune │ │ ├── multi_choice_qa │ │ │ ├── msr_vtt_mc_qa_pvt.sh │ │ │ └── msr_vtt_mc_qa_videoswin.sh │ │ ├── text2video_retrieval │ │ │ ├── didemo_ret_pvt.sh │ │ │ ├── didemo_ret_videoswin.sh │ │ │ ├── msr_vtt_ret_pvt.sh │ │ │ ├── msr_vtt_ret_videoswin.sh │ │ │ ├── msvd_ret_pvt.sh │ │ │ └── msvd_ret_videoswin.sh │ │ └── video_qa │ │ │ ├── msrvtt_qa_pvt.sh │ │ │ ├── msrvtt_qa_videoswin.sh │ │ │ ├── msvd_qa_pvt.sh │ │ │ └── msvd_qa_videoswin.sh │ ├── local_test │ │ └── snps3_quick_test.sh │ └── pretrain │ │ ├── cc_videoswin.sh │ │ ├── coco_vg_pvt.sh │ │ ├── coco_vg_resnet.sh │ │ └── webvid_videoswin.sh │ └── z_figs │ ├── ablation.jpg │ └── performance.jpg ├── requirements.txt └── tests ├── .DS_Store └── data ├── image ├── .DS_Store └── dog.jpg ├── video ├── VATEX_CN.jsonl ├── data │ ├── asr_files │ │ ├── video9770 │ │ └── video9771 │ └── mp4 │ │ ├── video9770.mp4 │ │ └── video9771.mp4 ├── msrvtt_multi_choice_qa.jsonl ├── msrvtt_test.jsonl ├── msrvtt_train.jsonl ├── ucf101_sample.jsonl ├── univl_img.jsonl └── univl_video.jsonl ├── vocab.txt └── vocabs ├── bert-base-chinese_21128_vocab.txt └── bert-base-uncased_30522_vocab.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | .DS_Store -------------------------------------------------------------------------------- /LEGAL.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/LEGAL.md -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/README.md -------------------------------------------------------------------------------- /README_EN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/README_EN.md -------------------------------------------------------------------------------- /antmmf/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/.DS_Store -------------------------------------------------------------------------------- /antmmf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/__init__.py -------------------------------------------------------------------------------- /antmmf/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/__init__.py -------------------------------------------------------------------------------- /antmmf/common/batch_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/batch_collator.py -------------------------------------------------------------------------------- /antmmf/common/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/build.py -------------------------------------------------------------------------------- /antmmf/common/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/checkpoint.py -------------------------------------------------------------------------------- /antmmf/common/configurable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/configurable.py -------------------------------------------------------------------------------- /antmmf/common/configuration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/configuration.py -------------------------------------------------------------------------------- /antmmf/common/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/constants.py -------------------------------------------------------------------------------- /antmmf/common/data_updater.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/data_updater.py -------------------------------------------------------------------------------- /antmmf/common/defaults/configs/base.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/defaults/configs/base.yml -------------------------------------------------------------------------------- /antmmf/common/meter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/meter.py -------------------------------------------------------------------------------- /antmmf/common/metrics_reporter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/metrics_reporter.py -------------------------------------------------------------------------------- /antmmf/common/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/registry.py -------------------------------------------------------------------------------- /antmmf/common/report.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/report.py -------------------------------------------------------------------------------- /antmmf/common/task_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/task_loader.py -------------------------------------------------------------------------------- /antmmf/common/test_reporter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/common/test_reporter.py -------------------------------------------------------------------------------- /antmmf/datasets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/.DS_Store -------------------------------------------------------------------------------- /antmmf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/__init__.py -------------------------------------------------------------------------------- /antmmf/datasets/base_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/base_dataset.py -------------------------------------------------------------------------------- /antmmf/datasets/base_dataset_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/base_dataset_builder.py -------------------------------------------------------------------------------- /antmmf/datasets/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/build.py -------------------------------------------------------------------------------- /antmmf/datasets/concat_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/concat_dataset.py -------------------------------------------------------------------------------- /antmmf/datasets/data_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/data_module.py -------------------------------------------------------------------------------- /antmmf/datasets/database/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /antmmf/datasets/database/annotated.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/database/annotated.py -------------------------------------------------------------------------------- /antmmf/datasets/database/features_database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/database/features_database.py -------------------------------------------------------------------------------- /antmmf/datasets/database/graph_database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/database/graph_database.py -------------------------------------------------------------------------------- /antmmf/datasets/database/image_database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/database/image_database.py -------------------------------------------------------------------------------- /antmmf/datasets/database/video_database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/database/video_database.py -------------------------------------------------------------------------------- /antmmf/datasets/features/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /antmmf/datasets/features/feature_readers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/features/feature_readers.py -------------------------------------------------------------------------------- /antmmf/datasets/features/vision/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/features/vision/__init__.py -------------------------------------------------------------------------------- /antmmf/datasets/features/vision/base_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/features/vision/base_extractor.py -------------------------------------------------------------------------------- /antmmf/datasets/features/vision/detectron_feature.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/features/vision/detectron_feature.py -------------------------------------------------------------------------------- /antmmf/datasets/features/vision/feature_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/features/vision/feature_saver.py -------------------------------------------------------------------------------- /antmmf/datasets/features/vision/imagenet_feature.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/features/vision/imagenet_feature.py -------------------------------------------------------------------------------- /antmmf/datasets/features/vision/video_feature.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/features/vision/video_feature.py -------------------------------------------------------------------------------- /antmmf/datasets/mm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/mm_dataset.py -------------------------------------------------------------------------------- /antmmf/datasets/multi_dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/multi_dataloader.py -------------------------------------------------------------------------------- /antmmf/datasets/processors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/processors/__init__.py -------------------------------------------------------------------------------- /antmmf/datasets/processors/image_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/processors/image_processors.py -------------------------------------------------------------------------------- /antmmf/datasets/processors/mm_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/processors/mm_processors.py -------------------------------------------------------------------------------- /antmmf/datasets/processors/processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/processors/processors.py -------------------------------------------------------------------------------- /antmmf/datasets/processors/text_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/processors/text_processors.py -------------------------------------------------------------------------------- /antmmf/datasets/processors/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # Copyright (c) 2023 Ant Group and its affiliates. 3 | -------------------------------------------------------------------------------- /antmmf/datasets/processors/transforms/detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/processors/transforms/detection.py -------------------------------------------------------------------------------- /antmmf/datasets/processors/video_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/processors/video_processors.py -------------------------------------------------------------------------------- /antmmf/datasets/samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/samplers.py -------------------------------------------------------------------------------- /antmmf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/datasets/utils.py -------------------------------------------------------------------------------- /antmmf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/__init__.py -------------------------------------------------------------------------------- /antmmf/models/ant_mmf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/ant_mmf.py -------------------------------------------------------------------------------- /antmmf/models/base_adversarial.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/base_adversarial.py -------------------------------------------------------------------------------- /antmmf/models/base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/base_model.py -------------------------------------------------------------------------------- /antmmf/models/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/bert.py -------------------------------------------------------------------------------- /antmmf/models/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/build.py -------------------------------------------------------------------------------- /antmmf/models/cnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/cnn.py -------------------------------------------------------------------------------- /antmmf/models/cnn_lstm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/cnn_lstm.py -------------------------------------------------------------------------------- /antmmf/models/comp_gcn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/comp_gcn.py -------------------------------------------------------------------------------- /antmmf/models/concat_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/concat_bert.py -------------------------------------------------------------------------------- /antmmf/models/image_classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/image_classification.py -------------------------------------------------------------------------------- /antmmf/models/layoutlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/layoutlm.py -------------------------------------------------------------------------------- /antmmf/models/mm_adversarial.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/mm_adversarial.py -------------------------------------------------------------------------------- /antmmf/models/mmbt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/mmbt.py -------------------------------------------------------------------------------- /antmmf/models/multitask_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/multitask_model.py -------------------------------------------------------------------------------- /antmmf/models/nlp_adversarial.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/nlp_adversarial.py -------------------------------------------------------------------------------- /antmmf/models/s3dg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/s3dg.py -------------------------------------------------------------------------------- /antmmf/models/spkResNet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/spkResNet.py -------------------------------------------------------------------------------- /antmmf/models/top_down_bottom_up.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/top_down_bottom_up.py -------------------------------------------------------------------------------- /antmmf/models/vilbert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/vilbert.py -------------------------------------------------------------------------------- /antmmf/models/visual_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/models/visual_bert.py -------------------------------------------------------------------------------- /antmmf/modules/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/.DS_Store -------------------------------------------------------------------------------- /antmmf/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/approx_compute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/approx_compute.py -------------------------------------------------------------------------------- /antmmf/modules/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/attention.py -------------------------------------------------------------------------------- /antmmf/modules/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/build.py -------------------------------------------------------------------------------- /antmmf/modules/classifier/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/classifier/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/classifier/bert_classifier_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/classifier/bert_classifier_head.py -------------------------------------------------------------------------------- /antmmf/modules/classifier/classifier_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/classifier/classifier_layer.py -------------------------------------------------------------------------------- /antmmf/modules/classifier/logit_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/classifier/logit_classifier.py -------------------------------------------------------------------------------- /antmmf/modules/classifier/transformer_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/classifier/transformer_decoder.py -------------------------------------------------------------------------------- /antmmf/modules/classifier/weight_norm_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/classifier/weight_norm_classifier.py -------------------------------------------------------------------------------- /antmmf/modules/decoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/decoders/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/decoders/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/decoders/decoder.py -------------------------------------------------------------------------------- /antmmf/modules/decoders/graph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/decoders/graph/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/decoders/graph/classify_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/decoders/graph/classify_decoder.py -------------------------------------------------------------------------------- /antmmf/modules/decoders/graph/delta_kg_classify_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/decoders/graph/delta_kg_classify_decoder.py -------------------------------------------------------------------------------- /antmmf/modules/decoders/graph/delta_kg_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/decoders/graph/delta_kg_decoder.py -------------------------------------------------------------------------------- /antmmf/modules/decoders/graph/ffn_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/decoders/graph/ffn_decoder.py -------------------------------------------------------------------------------- /antmmf/modules/decoders/graph/graph_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/decoders/graph/graph_decoder.py -------------------------------------------------------------------------------- /antmmf/modules/decoders/hierarchical_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/decoders/hierarchical_classifier.py -------------------------------------------------------------------------------- /antmmf/modules/decoders/language_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/decoders/language_decoder.py -------------------------------------------------------------------------------- /antmmf/modules/decoders/transformer_decoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/decoders/transformer_decoder_model.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/antmmf_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/antmmf_embeddings.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/bert_vision_linguistic_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/bert_vision_linguistic_embeddings.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/clip_visual_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/clip_visual_embedding.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/detr_position_embedding_learned.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/detr_position_embedding_learned.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/detr_position_embedding_sine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/detr_position_embedding_sine.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/image_bert_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/image_bert_embeddings.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/image_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/image_embedding.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/layout_lm_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/layout_lm_embeddings.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/text_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/text_embedding.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/univl_layout_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/univl_layout_embedding.py -------------------------------------------------------------------------------- /antmmf/modules/embeddings/visual_layout_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/embeddings/visual_layout_embeddings.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/graph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/graph/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/graph/continuous_time_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/graph/continuous_time_encoder.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/graph/delta_kg_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/graph/delta_kg_encoder.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/graph/gat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/graph/gat.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/graph/gat_adj_matrix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/graph/gat_adj_matrix.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/graph/graph_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/graph/graph_encoder.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/graph/naive_attention_based_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/graph/naive_attention_based_encoder.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/image_feature_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/image_feature_encoder.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/multimodal_bert_clf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/multimodal_bert_clf.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/multimodal_bert_for_pretraining.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/multimodal_bert_for_pretraining.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/multimodal_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/multimodal_encoder.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/text_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/text_encoder.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/utils.py -------------------------------------------------------------------------------- /antmmf/modules/encoders/visual_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/encoders/visual_encoder.py -------------------------------------------------------------------------------- /antmmf/modules/functional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/functional/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/functional/set_criterion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/functional/set_criterion.py -------------------------------------------------------------------------------- /antmmf/modules/functional/swish.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/functional/swish.py -------------------------------------------------------------------------------- /antmmf/modules/fusions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/fusions.py -------------------------------------------------------------------------------- /antmmf/modules/graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/graph.py -------------------------------------------------------------------------------- /antmmf/modules/interpret/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/interpret/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/interpret/integrated_gradient.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/interpret/integrated_gradient.py -------------------------------------------------------------------------------- /antmmf/modules/interpret/saliency_interpreter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/interpret/saliency_interpreter.py -------------------------------------------------------------------------------- /antmmf/modules/interpret/simple_gradient.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/interpret/simple_gradient.py -------------------------------------------------------------------------------- /antmmf/modules/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/layers/conditional_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/conditional_layer_norm.py -------------------------------------------------------------------------------- /antmmf/modules/layers/consensus_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/consensus_module.py -------------------------------------------------------------------------------- /antmmf/modules/layers/conv_net.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/conv_net.py -------------------------------------------------------------------------------- /antmmf/modules/layers/crf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/crf.py -------------------------------------------------------------------------------- /antmmf/modules/layers/exu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/exu.py -------------------------------------------------------------------------------- /antmmf/modules/layers/feats_joint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/feats_joint.py -------------------------------------------------------------------------------- /antmmf/modules/layers/frozen_batchnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/frozen_batchnorm.py -------------------------------------------------------------------------------- /antmmf/modules/layers/gated_tanh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/gated_tanh.py -------------------------------------------------------------------------------- /antmmf/modules/layers/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/linear.py -------------------------------------------------------------------------------- /antmmf/modules/layers/mb_conv_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/mb_conv_block.py -------------------------------------------------------------------------------- /antmmf/modules/layers/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/mlp.py -------------------------------------------------------------------------------- /antmmf/modules/layers/mlp_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/mlp_attention.py -------------------------------------------------------------------------------- /antmmf/modules/layers/modal_combine_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/modal_combine_layer.py -------------------------------------------------------------------------------- /antmmf/modules/layers/padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/padding.py -------------------------------------------------------------------------------- /antmmf/modules/layers/swish.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/swish.py -------------------------------------------------------------------------------- /antmmf/modules/layers/transform_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/transform_layer.py -------------------------------------------------------------------------------- /antmmf/modules/layers/vae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/layers/vae.py -------------------------------------------------------------------------------- /antmmf/modules/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/losses/asymmetric_loss_optimized.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/asymmetric_loss_optimized.py -------------------------------------------------------------------------------- /antmmf/modules/losses/attention_supervision_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/attention_supervision_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/binary_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/binary_cross_entropy.py -------------------------------------------------------------------------------- /antmmf/modules/losses/binary_cross_entropy_with_label_smoothing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/binary_cross_entropy_with_label_smoothing.py -------------------------------------------------------------------------------- /antmmf/modules/losses/binary_cross_entropy_with_logits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/binary_cross_entropy_with_logits.py -------------------------------------------------------------------------------- /antmmf/modules/losses/caption_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/caption_cross_entropy.py -------------------------------------------------------------------------------- /antmmf/modules/losses/combined_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/combined_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/cos_ams_softmax_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/cos_ams_softmax_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/cos_arc_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/cos_arc_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/cross_entropy_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/cross_entropy_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/eet_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/eet_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/hierarchical_multilabel_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/hierarchical_multilabel_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/hierarchical_softmax_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/hierarchical_softmax_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/info_nce_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/info_nce_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/kg_margin_contrastive_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/kg_margin_contrastive_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/knowledge_distill_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/knowledge_distill_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/label_smoothing_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/label_smoothing_cross_entropy.py -------------------------------------------------------------------------------- /antmmf/modules/losses/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/losses.py -------------------------------------------------------------------------------- /antmmf/modules/losses/m4c_decoding_bce_with_mask_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/m4c_decoding_bce_with_mask_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/mil_margin_contrastive_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/mil_margin_contrastive_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/mil_nce_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/mil_nce_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/mse_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/mse_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/multi_label_category_cross_entropy_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/multi_label_category_cross_entropy_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/multi_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/multi_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/nce_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/nce_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/nll_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/nll_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/ordinal_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/ordinal_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/pairwise_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/pairwise_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/softmax_focal_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/softmax_focal_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/softmax_kl_div_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/softmax_kl_div_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/weighted_softmax_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/weighted_softmax_loss.py -------------------------------------------------------------------------------- /antmmf/modules/losses/wrong_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/losses/wrong_loss.py -------------------------------------------------------------------------------- /antmmf/modules/matcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/matcher.py -------------------------------------------------------------------------------- /antmmf/modules/message_passing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/message_passing/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/message_passing/delta_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/message_passing/delta_conv.py -------------------------------------------------------------------------------- /antmmf/modules/message_passing/message_passing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/message_passing/message_passing.py -------------------------------------------------------------------------------- /antmmf/modules/message_passing/qkv_attention_graph_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/message_passing/qkv_attention_graph_conv.py -------------------------------------------------------------------------------- /antmmf/modules/message_passing/relation_wise_norm_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/message_passing/relation_wise_norm_conv.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/accuracy.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/asm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/asm.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/base_metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/base_metric.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/bleu4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/bleu4.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/caption_bleu4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/caption_bleu4.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/evaluators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/evaluators/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/evaluators/coco_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/evaluators/coco_eval.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/f1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/f1.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/global_retrieval_recall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/global_retrieval_recall.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/hier_label_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/hier_label_accuracy.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/hier_multilabel_f1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/hier_multilabel_f1.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/ks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/ks.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/map.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/mce_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/mce_accuracy.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/mean_rank.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/mean_rank.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/mean_reciprocal_rank.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/mean_reciprocal_rank.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/metrics.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/mm_retrieval_recall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/mm_retrieval_recall.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/multi_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/multi_accuracy.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/multi_macro_f1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/multi_macro_f1.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/rank_and_hits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/rank_and_hits.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/recall_at_k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/recall_at_k.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/rmce_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/rmce_accuracy.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/roc_auc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/roc_auc.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/rouge_antmmf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/rouge_antmmf.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/span_f1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/span_f1.py -------------------------------------------------------------------------------- /antmmf/modules/metrics/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/metrics/utils.py -------------------------------------------------------------------------------- /antmmf/modules/module_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/module_registry.py -------------------------------------------------------------------------------- /antmmf/modules/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/transformers/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/transformers/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/transformers/base.py -------------------------------------------------------------------------------- /antmmf/modules/transformers/heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/transformers/heads/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/transformers/heads/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/transformers/heads/base.py -------------------------------------------------------------------------------- /antmmf/modules/transformers/heads/detr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/transformers/heads/detr.py -------------------------------------------------------------------------------- /antmmf/modules/transformers/heads/itm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/transformers/heads/itm.py -------------------------------------------------------------------------------- /antmmf/modules/transformers/heads/mlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/transformers/heads/mlm.py -------------------------------------------------------------------------------- /antmmf/modules/transformers/heads/mrc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/transformers/heads/mrc.py -------------------------------------------------------------------------------- /antmmf/modules/transformers/position_enhance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/transformers/position_enhance.py -------------------------------------------------------------------------------- /antmmf/modules/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/utils.py -------------------------------------------------------------------------------- /antmmf/modules/vision/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/__init__.py -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/cctt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/backbone/cctt.py -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/clip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/clip/cn_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/backbone/clip/cn_model.py -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/clip/cn_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/backbone/clip/cn_tokenizer.py -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/clip/configuration_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/backbone/clip/configuration_bert.py -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/clip/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/backbone/clip/model.py -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/clip/modeling_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/backbone/clip/modeling_bert.py -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/clip/simple_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/backbone/clip/simple_tokenizer.py -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/clip/vocab.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/backbone/clip/vocab.txt -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/efficientnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/backbone/efficientnet.py -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/pvt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/backbone/pvt.py -------------------------------------------------------------------------------- /antmmf/modules/vision/backbone/video_swin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/backbone/video_swin.py -------------------------------------------------------------------------------- /antmmf/modules/vision/necks/BackboneWithFPN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/necks/BackboneWithFPN.py -------------------------------------------------------------------------------- /antmmf/modules/vision/non_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/non_local.py -------------------------------------------------------------------------------- /antmmf/modules/vision/temporal_shift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/modules/vision/temporal_shift.py -------------------------------------------------------------------------------- /antmmf/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/optimizer/__init__.py -------------------------------------------------------------------------------- /antmmf/optimizer/adan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/optimizer/adan.py -------------------------------------------------------------------------------- /antmmf/optimizer/adv_free_lb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/optimizer/adv_free_lb.py -------------------------------------------------------------------------------- /antmmf/optimizer/basic_optimizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/optimizer/basic_optimizers.py -------------------------------------------------------------------------------- /antmmf/optimizer/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/optimizer/build.py -------------------------------------------------------------------------------- /antmmf/optimizer/combine_optimizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/optimizer/combine_optimizers.py -------------------------------------------------------------------------------- /antmmf/predictors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/predictors/__init__.py -------------------------------------------------------------------------------- /antmmf/predictors/base_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/predictors/base_predictor.py -------------------------------------------------------------------------------- /antmmf/predictors/batch_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/predictors/batch_predictor.py -------------------------------------------------------------------------------- /antmmf/predictors/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/predictors/build.py -------------------------------------------------------------------------------- /antmmf/predictors/mmbt_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/predictors/mmbt_predictor.py -------------------------------------------------------------------------------- /antmmf/predictors/multitask_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/predictors/multitask_predictor.py -------------------------------------------------------------------------------- /antmmf/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/run.py -------------------------------------------------------------------------------- /antmmf/scripts/extract_vocabulary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/scripts/extract_vocabulary.py -------------------------------------------------------------------------------- /antmmf/scripts/features/extract_features.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/scripts/features/extract_features.md -------------------------------------------------------------------------------- /antmmf/scripts/features/extract_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/scripts/features/extract_features.py -------------------------------------------------------------------------------- /antmmf/scripts/features/extract_features_vmb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/scripts/features/extract_features_vmb.py -------------------------------------------------------------------------------- /antmmf/scripts/features/extract_resnet152_feat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/scripts/features/extract_resnet152_feat.py -------------------------------------------------------------------------------- /antmmf/scripts/features/extract_resnet_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/scripts/features/extract_resnet_features.py -------------------------------------------------------------------------------- /antmmf/scripts/features/lmdb_conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/scripts/features/lmdb_conversion.py -------------------------------------------------------------------------------- /antmmf/scripts/hm_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/scripts/hm_convert.py -------------------------------------------------------------------------------- /antmmf/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/structures/__init__.py -------------------------------------------------------------------------------- /antmmf/structures/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/structures/base.py -------------------------------------------------------------------------------- /antmmf/structures/boxes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/structures/boxes.py -------------------------------------------------------------------------------- /antmmf/structures/images.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/structures/images.py -------------------------------------------------------------------------------- /antmmf/structures/nested_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/structures/nested_tensor.py -------------------------------------------------------------------------------- /antmmf/structures/sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/structures/sample.py -------------------------------------------------------------------------------- /antmmf/structures/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/structures/utils.py -------------------------------------------------------------------------------- /antmmf/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/tasks/__init__.py -------------------------------------------------------------------------------- /antmmf/tasks/base_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/tasks/base_task.py -------------------------------------------------------------------------------- /antmmf/trainers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/trainers/__init__.py -------------------------------------------------------------------------------- /antmmf/trainers/adv_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/trainers/adv_trainer.py -------------------------------------------------------------------------------- /antmmf/trainers/base_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/trainers/base_trainer.py -------------------------------------------------------------------------------- /antmmf/trainers/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/trainers/build.py -------------------------------------------------------------------------------- /antmmf/trainers/distill_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/trainers/distill_trainer.py -------------------------------------------------------------------------------- /antmmf/trainers/remote_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/trainers/remote_trainer.py -------------------------------------------------------------------------------- /antmmf/trainers/retrieval_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/trainers/retrieval_trainer.py -------------------------------------------------------------------------------- /antmmf/utils/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/.DS_Store -------------------------------------------------------------------------------- /antmmf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/__init__.py -------------------------------------------------------------------------------- /antmmf/utils/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/dataset_utils.py -------------------------------------------------------------------------------- /antmmf/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/distributed.py -------------------------------------------------------------------------------- /antmmf/utils/distributed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/distributed_utils.py -------------------------------------------------------------------------------- /antmmf/utils/download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/download.py -------------------------------------------------------------------------------- /antmmf/utils/early_stopping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/early_stopping.py -------------------------------------------------------------------------------- /antmmf/utils/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/env.py -------------------------------------------------------------------------------- /antmmf/utils/file_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/file_io.py -------------------------------------------------------------------------------- /antmmf/utils/flags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/flags.py -------------------------------------------------------------------------------- /antmmf/utils/general.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/general.py -------------------------------------------------------------------------------- /antmmf/utils/glob.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/glob.py -------------------------------------------------------------------------------- /antmmf/utils/image_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/image_ops.py -------------------------------------------------------------------------------- /antmmf/utils/init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/init.py -------------------------------------------------------------------------------- /antmmf/utils/inspector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/inspector.py -------------------------------------------------------------------------------- /antmmf/utils/launch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/launch.py -------------------------------------------------------------------------------- /antmmf/utils/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/logger.py -------------------------------------------------------------------------------- /antmmf/utils/optim_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/optim_utils.py -------------------------------------------------------------------------------- /antmmf/utils/phoc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/phoc/__init__.py -------------------------------------------------------------------------------- /antmmf/utils/phoc/build_phoc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/phoc/build_phoc.py -------------------------------------------------------------------------------- /antmmf/utils/phoc/src/cphoc.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/phoc/src/cphoc.c -------------------------------------------------------------------------------- /antmmf/utils/phoc/src/src_note.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/phoc/src/src_note.md -------------------------------------------------------------------------------- /antmmf/utils/register_fp32.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/register_fp32.py -------------------------------------------------------------------------------- /antmmf/utils/scatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/scatter.py -------------------------------------------------------------------------------- /antmmf/utils/tensor_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/tensor_utils.py -------------------------------------------------------------------------------- /antmmf/utils/text_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/text_utils.py -------------------------------------------------------------------------------- /antmmf/utils/timer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/timer.py -------------------------------------------------------------------------------- /antmmf/utils/video_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/video_utils.py -------------------------------------------------------------------------------- /antmmf/utils/visual_utils/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/visual_utils/.DS_Store -------------------------------------------------------------------------------- /antmmf/utils/visual_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/visual_utils/__init__.py -------------------------------------------------------------------------------- /antmmf/utils/visual_utils/palette.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/visual_utils/palette.py -------------------------------------------------------------------------------- /antmmf/utils/visual_utils/vis_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/visual_utils/vis_utils.py -------------------------------------------------------------------------------- /antmmf/utils/visual_utils/visualization_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/visual_utils/visualization_utils.py -------------------------------------------------------------------------------- /antmmf/utils/visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/visualize.py -------------------------------------------------------------------------------- /antmmf/utils/vocab.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/antmmf/utils/vocab.py -------------------------------------------------------------------------------- /prj/EVE/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/EVE/README.md -------------------------------------------------------------------------------- /prj/M2_Encoder/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/README.md -------------------------------------------------------------------------------- /prj/M2_Encoder/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Copyright (c) 2023 Ant Group and its affiliates. 3 | # @Author: mujian 4 | -------------------------------------------------------------------------------- /prj/M2_Encoder/configs/Encoder_0.4B.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/configs/Encoder_0.4B.json -------------------------------------------------------------------------------- /prj/M2_Encoder/configs/Encoder_10B.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/configs/Encoder_10B.json -------------------------------------------------------------------------------- /prj/M2_Encoder/configs/Encoder_1B.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/configs/Encoder_1B.json -------------------------------------------------------------------------------- /prj/M2_Encoder/data/coco-cn_test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/data/coco-cn_test.jsonl -------------------------------------------------------------------------------- /prj/M2_Encoder/data/coco_caption_karpathy_test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/data/coco_caption_karpathy_test.jsonl -------------------------------------------------------------------------------- /prj/M2_Encoder/data/f30k-cn_test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/data/f30k-cn_test.jsonl -------------------------------------------------------------------------------- /prj/M2_Encoder/data/f30k_caption_karpathy_test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/data/f30k_caption_karpathy_test.jsonl -------------------------------------------------------------------------------- /prj/M2_Encoder/eval_retrieval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/eval_retrieval.py -------------------------------------------------------------------------------- /prj/M2_Encoder/m2_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/m2_encoder.py -------------------------------------------------------------------------------- /prj/M2_Encoder/ms_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/ms_wrapper.py -------------------------------------------------------------------------------- /prj/M2_Encoder/pics/cn_imagenet_cls.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/pics/cn_imagenet_cls.jpg -------------------------------------------------------------------------------- /prj/M2_Encoder/pics/cn_retrieval.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/pics/cn_retrieval.jpg -------------------------------------------------------------------------------- /prj/M2_Encoder/pics/effect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/pics/effect.png -------------------------------------------------------------------------------- /prj/M2_Encoder/pics/en_imagenet_cls.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/pics/en_imagenet_cls.jpg -------------------------------------------------------------------------------- /prj/M2_Encoder/pics/en_retrieval.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/pics/en_retrieval.jpg -------------------------------------------------------------------------------- /prj/M2_Encoder/pics/fine-grained.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/pics/fine-grained.jpg -------------------------------------------------------------------------------- /prj/M2_Encoder/pics/pokemon.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/pics/pokemon.jpeg -------------------------------------------------------------------------------- /prj/M2_Encoder/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/requirements.txt -------------------------------------------------------------------------------- /prj/M2_Encoder/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/run.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/config.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .vlmo_module import VLMo 2 | -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/modules/heads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/modules/heads.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/modules/modeling_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/modules/modeling_utils.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/modules/multiway_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/modules/multiway_transformer.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/modules/objectives.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/modules/objectives.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/modules/vlmo_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/modules/vlmo_module.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/modules/vlmo_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/modules/vlmo_utils.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/tokenizer/__init__.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/tokenizer/sp.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/tokenizer/sp.model -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/tokenizer/tokenization_glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/tokenizer/tokenization_glm.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/__init__.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/architecture/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/architecture/__init__.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/architecture/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/architecture/config.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/architecture/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/architecture/decoder.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/architecture/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/architecture/encoder.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/architecture/encoder_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/architecture/encoder_decoder.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/architecture/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/architecture/utils.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/component/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/component/__init__.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/component/droppath.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/component/droppath.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/component/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/component/embedding.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/component/feedforward_network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/component/feedforward_network.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/component/multihead_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/component/multihead_attention.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/component/multiway_network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/component/multiway_network.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/component/relative_position_bias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/component/relative_position_bias.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/component/xmoe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/component/xmoe/__init__.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/component/xmoe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/component/xmoe/moe_layer.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/component/xmoe/routing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/component/xmoe/routing.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/component/xpos_relative_position.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/component/xpos_relative_position.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/model/BEiT3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/model/BEiT3.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/torchscale/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/torchscale/model/__init__.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/transforms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/transforms/__init__.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/transforms/pixelbert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/transforms/pixelbert.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/transforms/randaug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/transforms/randaug.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/transforms/randaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/transforms/randaugment.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/transforms/square_transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/transforms/square_transform.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/transforms/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/transforms/utils.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/utils/beit_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/utils/beit_utils.py -------------------------------------------------------------------------------- /prj/M2_Encoder/vlmo/utils/patch_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_Encoder/vlmo/utils/patch_utils.py -------------------------------------------------------------------------------- /prj/M2_RAAP/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_RAAP/README.md -------------------------------------------------------------------------------- /prj/M2_omni/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/README.md -------------------------------------------------------------------------------- /prj/M2_omni/data/audioqa.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/data/audioqa.wav -------------------------------------------------------------------------------- /prj/M2_omni/data/m2-omni.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/data/m2-omni.png -------------------------------------------------------------------------------- /prj/M2_omni/data/plant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/data/plant.png -------------------------------------------------------------------------------- /prj/M2_omni/data/video1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/data/video1.mp4 -------------------------------------------------------------------------------- /prj/M2_omni/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/__init__.py -------------------------------------------------------------------------------- /prj/M2_omni/models/configuration_llama_3d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/configuration_llama_3d.py -------------------------------------------------------------------------------- /prj/M2_omni/models/configuration_m2omni.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/configuration_m2omni.py -------------------------------------------------------------------------------- /prj/M2_omni/models/configuration_qwen2_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/configuration_qwen2_vit.py -------------------------------------------------------------------------------- /prj/M2_omni/models/feature_extraction_sanm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/feature_extraction_sanm.py -------------------------------------------------------------------------------- /prj/M2_omni/models/image_processing_m2omni.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/image_processing_m2omni.py -------------------------------------------------------------------------------- /prj/M2_omni/models/m2omni_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/m2omni_utils.py -------------------------------------------------------------------------------- /prj/M2_omni/models/modeling_llama_3d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/modeling_llama_3d.py -------------------------------------------------------------------------------- /prj/M2_omni/models/modeling_m2omni.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/modeling_m2omni.py -------------------------------------------------------------------------------- /prj/M2_omni/models/processing_m2omni.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/processing_m2omni.py -------------------------------------------------------------------------------- /prj/M2_omni/models/qwen2_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/qwen2_vit.py -------------------------------------------------------------------------------- /prj/M2_omni/models/sanm_audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/models/sanm_audio.py -------------------------------------------------------------------------------- /prj/M2_omni/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/M2_omni/requirements.txt -------------------------------------------------------------------------------- /prj/Pink/47.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/47.png -------------------------------------------------------------------------------- /prj/Pink/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/README.md -------------------------------------------------------------------------------- /prj/Pink/dataset_generation/coco_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/dataset_generation/coco_detection.py -------------------------------------------------------------------------------- /prj/Pink/dataset_generation/object365_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/dataset_generation/object365_detection.py -------------------------------------------------------------------------------- /prj/Pink/dataset_generation/pointing_vqa_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/dataset_generation/pointing_vqa_local.py -------------------------------------------------------------------------------- /prj/Pink/dataset_generation/pointing_vqa_look_twice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/dataset_generation/pointing_vqa_look_twice.py -------------------------------------------------------------------------------- /prj/Pink/dataset_generation/v7w_pointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/dataset_generation/v7w_pointing.py -------------------------------------------------------------------------------- /prj/Pink/dataset_generation/v7w_telling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/dataset_generation/v7w_telling.py -------------------------------------------------------------------------------- /prj/Pink/dataset_generation/visual_genome2vg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/dataset_generation/visual_genome2vg.py -------------------------------------------------------------------------------- /prj/Pink/demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/demo.py -------------------------------------------------------------------------------- /prj/Pink/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/image.png -------------------------------------------------------------------------------- /prj/Pink/inference.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/inference.ipynb -------------------------------------------------------------------------------- /prj/Pink/nash_high.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/nash_high.jpeg -------------------------------------------------------------------------------- /prj/Pink/pink/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/__init__.py -------------------------------------------------------------------------------- /prj/Pink/pink/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/constants.py -------------------------------------------------------------------------------- /prj/Pink/pink/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/conversation.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/AOKVQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/AOKVQA.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/BaseDataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/BaseDataset.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/COCOCaption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/COCOCaption.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/FlickrCaption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/FlickrCaption.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/FlickrEntity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/FlickrEntity.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/GQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/GQA.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/LLaVA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/LLaVA.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/LLaVACaption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/LLaVACaption.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/OKVQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/OKVQA.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/Object365.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/Object365.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/PointingVQALocal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/PointingVQALocal.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/PointingVQALookTwice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/PointingVQALookTwice.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/PretrainCaption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/PretrainCaption.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/ProbMergeDataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/ProbMergeDataset.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/Templates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/Templates.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/V7WGrounding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/V7WGrounding.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/VQAv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/VQAv2.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/VSR.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/VSR.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/VisualGenome.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/VisualGenome.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/VisualGrounding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/VisualGrounding.py -------------------------------------------------------------------------------- /prj/Pink/pink/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/datasets/__init__.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/eval_gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/eval_gqa.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/eval_pointingvqa_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/eval_pointingvqa_local.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/eval_pointingvqa_looktwice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/eval_pointingvqa_looktwice.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/eval_v7wgrounding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/eval_v7wgrounding.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/eval_vg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/eval_vg.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/eval_vqav2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/eval_vqav2.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/model_gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/model_gqa.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/model_iconqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/model_iconqa.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/model_object365.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/model_object365.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/model_okvqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/model_okvqa.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/model_pointingvqa_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/model_pointingvqa_local.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/model_pointingvqa_looktwice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/model_pointingvqa_looktwice.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/model_seed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/model_seed.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/model_v7wgrounding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/model_v7wgrounding.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/model_vg_base_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/model_vg_base_batch.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/model_vqav2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/model_vqav2.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/model_vsr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/model_vsr.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/object365_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/object365_filter.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/vqa_tools/vqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/vqa_tools/vqa.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/vqa_tools/vqa_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/vqa_tools/vqa_eval.py -------------------------------------------------------------------------------- /prj/Pink/pink/eval/vqa_tools/vqa_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/eval/vqa_tools/vqa_result.py -------------------------------------------------------------------------------- /prj/Pink/pink/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/model/__init__.py -------------------------------------------------------------------------------- /prj/Pink/pink/model/adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/model/adapter.py -------------------------------------------------------------------------------- /prj/Pink/pink/model/eva_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/model/eva_vit.py -------------------------------------------------------------------------------- /prj/Pink/pink/model/pink.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/model/pink.py -------------------------------------------------------------------------------- /prj/Pink/pink/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/model/utils.py -------------------------------------------------------------------------------- /prj/Pink/pink/train/pink_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/train/pink_trainer.py -------------------------------------------------------------------------------- /prj/Pink/pink/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pink/train/train.py -------------------------------------------------------------------------------- /prj/Pink/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/pyproject.toml -------------------------------------------------------------------------------- /prj/Pink/scripts/eval_refcoco.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/scripts/eval_refcoco.sh -------------------------------------------------------------------------------- /prj/Pink/scripts/object365_generate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/scripts/object365_generate.sh -------------------------------------------------------------------------------- /prj/Pink/scripts/stage1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/scripts/stage1.sh -------------------------------------------------------------------------------- /prj/Pink/scripts/stage2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/scripts/stage2.sh -------------------------------------------------------------------------------- /prj/Pink/scripts/stage2_with_object365.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/Pink/scripts/stage2_with_object365.sh -------------------------------------------------------------------------------- /prj/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/README.md -------------------------------------------------------------------------------- /prj/base_vtp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/README.md -------------------------------------------------------------------------------- /prj/base_vtp/README_base_vtp_en.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/README_base_vtp_en.md -------------------------------------------------------------------------------- /prj/base_vtp/configs/roi_modelling/roi_model_pretrain.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/roi_modelling/roi_model_pretrain.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/image/pretrain/univl_pretrain.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/image/pretrain/univl_pretrain.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/finetune_multi_choice_qa/base.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/finetune_multi_choice_qa/base.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/finetune_retrieval/CN_vatex_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/finetune_retrieval/CN_vatex_pvt.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/finetune_retrieval/CN_vatex_pvt_local.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/finetune_retrieval/CN_vatex_pvt_local.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/finetune_retrieval/CN_vatex_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/finetune_retrieval/CN_vatex_videoswin.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/finetune_retrieval/base.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/finetune_retrieval/base.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/finetune_retrieval/didemo_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/finetune_retrieval/didemo_pvt.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/finetune_retrieval/msr_vtt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/finetune_retrieval/msr_vtt.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/finetune_retrieval/msr_vtt_pvt.local.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/finetune_retrieval/msr_vtt_pvt.local.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/finetune_retrieval/msr_vtt_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/finetune_retrieval/msr_vtt_pvt.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/finetune_retrieval/msr_vtt_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/finetune_retrieval/msr_vtt_videoswin.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/pretrain/CN_video_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/pretrain/CN_video_videoswin.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/pretrain/base.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/pretrain/base.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/pretrain/chinese.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/pretrain/chinese.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/pretrain/coco_vg.local.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/pretrain/coco_vg.local.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/pretrain/coco_vg.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/pretrain/coco_vg.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/pretrain/coco_vg_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/pretrain/coco_vg_pvt.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/pretrain/coco_vg_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/pretrain/coco_vg_videoswin.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/pretrain/howto100m_coco_vg_rnd_asr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/pretrain/howto100m_coco_vg_rnd_asr.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/pretrain/video_swin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/pretrain/video_swin.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/pretrain/webvid_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/pretrain/webvid_videoswin.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/visual_encoder/pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/visual_encoder/pvt.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/visual_encoder/resnet.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/visual_encoder/resnet.yml -------------------------------------------------------------------------------- /prj/base_vtp/configs/univl/video/visual_encoder/video_swin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/configs/univl/video/visual_encoder/video_swin.yml -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/__init__.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/roi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/roi/__init__.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/roi/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/roi/builder.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/roi/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/roi/dataset.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/roi/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/roi/model.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/roi/region_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/roi/region_processor.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/roi/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/roi/task.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/__init__.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/__init__.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/clip_text_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/clip_text_encoder.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/clip_visual_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/clip_visual_encoder.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/moco_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/moco_utils.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/univl_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/univl_base.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/univl_classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/univl_classification.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/univl_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/univl_model.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/univl_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/univl_pretrain.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/univl_video_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/univl_video_base.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/univl_video_cls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/univl_video_cls.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/univl_video_multi_choice_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/univl_video_multi_choice_qa.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/univl_video_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/univl_video_pretrain.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/model/univl_video_ret.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/model/univl_video_ret.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/pretrain_img_text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/pretrain_img_text/__init__.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/pretrain_img_text/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/pretrain_img_text/builder.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/pretrain_img_text/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/pretrain_img_text/dataset.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/pretrain_video_text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/pretrain_video_text/__init__.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/pretrain_video_text/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/pretrain_video_text/builder.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/pretrain_video_text/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/pretrain_video_text/dataset.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/processors.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/task.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/video_text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/video_text/__init__.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/video_text/cls_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/video_text/cls_builder.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/video_text/cls_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/video_text/cls_dataset.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/video_text/mc_qa_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/video_text/mc_qa_builder.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/video_text/mc_qa_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/video_text/mc_qa_dataset.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/video_text/ret_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/video_text/ret_builder.py -------------------------------------------------------------------------------- /prj/base_vtp/roi_univl/univl/video_text/ret_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/roi_univl/univl/video_text/ret_dataset.py -------------------------------------------------------------------------------- /prj/base_vtp/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/run.py -------------------------------------------------------------------------------- /prj/base_vtp/scripts/finetune/mcvqa_msr_vtt_mc_qa_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/scripts/finetune/mcvqa_msr_vtt_mc_qa_videoswin.sh -------------------------------------------------------------------------------- /prj/base_vtp/scripts/finetune/ret_msr_vtt_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/scripts/finetune/ret_msr_vtt_videoswin.sh -------------------------------------------------------------------------------- /prj/base_vtp/scripts/finetune/vqa_msrvtt_qa_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/scripts/finetune/vqa_msrvtt_qa_videoswin.sh -------------------------------------------------------------------------------- /prj/base_vtp/scripts/local_test/coco_vg.local.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/scripts/local_test/coco_vg.local.sh -------------------------------------------------------------------------------- /prj/base_vtp/scripts/local_test/msr_vtt_pvt.local.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/scripts/local_test/msr_vtt_pvt.local.sh -------------------------------------------------------------------------------- /prj/base_vtp/scripts/pretrain/coco_vg_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/scripts/pretrain/coco_vg_videoswin.sh -------------------------------------------------------------------------------- /prj/base_vtp/scripts/pretrain/webvid_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/base_vtp/scripts/pretrain/webvid_videoswin.sh -------------------------------------------------------------------------------- /prj/cnvid_vtp/CODEBASE_CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/CODEBASE_CN.md -------------------------------------------------------------------------------- /prj/cnvid_vtp/CODEBASE_EN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/CODEBASE_EN.md -------------------------------------------------------------------------------- /prj/cnvid_vtp/DATASET.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/DATASET.md -------------------------------------------------------------------------------- /prj/cnvid_vtp/LEGAL.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/LEGAL.md -------------------------------------------------------------------------------- /prj/cnvid_vtp/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/LICENSE -------------------------------------------------------------------------------- /prj/cnvid_vtp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/README.md -------------------------------------------------------------------------------- /prj/cnvid_vtp/TERMS.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/TERMS.md -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/roi_modelling/roi_model_pretrain.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/roi_modelling/roi_model_pretrain.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/image/pretrain/univl_pretrain.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/image/pretrain/univl_pretrain.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/finetune_retrieval/CN_vatex_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/finetune_retrieval/CN_vatex_videoswin.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/finetune_retrieval/EN_vatex_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/finetune_retrieval/EN_vatex_videoswin.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/finetune_retrieval/base.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/finetune_retrieval/base.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/pretrain/CN_video_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/pretrain/CN_video_videoswin.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/pretrain/EN_coco_vg_cc_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/pretrain/EN_coco_vg_cc_videoswin.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/pretrain/EN_webvid_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/pretrain/EN_webvid_videoswin.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/pretrain/base.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/pretrain/base.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/pretrain/chinese.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/pretrain/chinese.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/pretrain/howto100m_coco_vg_rnd_asr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/pretrain/howto100m_coco_vg_rnd_asr.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/pretrain/quick_test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/pretrain/quick_test.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/pretrain/video_swin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/pretrain/video_swin.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/configs/univl/video/visual_encoder/video_swin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/configs/univl/video/visual_encoder/video_swin.yml -------------------------------------------------------------------------------- /prj/cnvid_vtp/demo_figs/adj_count_t50.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/demo_figs/adj_count_t50.jpg -------------------------------------------------------------------------------- /prj/cnvid_vtp/demo_figs/experiment_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/demo_figs/experiment_result.jpg -------------------------------------------------------------------------------- /prj/cnvid_vtp/demo_figs/keyword_cloud_t200.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/demo_figs/keyword_cloud_t200.jpg -------------------------------------------------------------------------------- /prj/cnvid_vtp/demo_figs/motivation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/demo_figs/motivation.jpg -------------------------------------------------------------------------------- /prj/cnvid_vtp/demo_figs/noun_count_t50.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/demo_figs/noun_count_t50.jpg -------------------------------------------------------------------------------- /prj/cnvid_vtp/demo_figs/teaser_figure.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/demo_figs/teaser_figure.jpg -------------------------------------------------------------------------------- /prj/cnvid_vtp/demo_figs/time_count.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/demo_figs/time_count.jpg -------------------------------------------------------------------------------- /prj/cnvid_vtp/demo_figs/topic_cloud_t200.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/demo_figs/topic_cloud_t200.jpg -------------------------------------------------------------------------------- /prj/cnvid_vtp/demo_figs/verb_count_t50.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/demo_figs/verb_count_t50.jpg -------------------------------------------------------------------------------- /prj/cnvid_vtp/demo_figs/video_example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/demo_figs/video_example.jpg -------------------------------------------------------------------------------- /prj/cnvid_vtp/download_cnvid/download_cnvid_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/download_cnvid/download_cnvid_video.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/requirements.txt -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/__init__.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/roi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/roi/__init__.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/roi/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/roi/builder.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/roi/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/roi/dataset.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/roi/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/roi/model.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/roi/region_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/roi/region_processor.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/roi/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/roi/task.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/__init__.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/model/__init__.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/model/moco_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/model/moco_utils.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/model/univl_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/model/univl_base.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/model/univl_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/model/univl_model.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/model/univl_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/model/univl_pretrain.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/model/univl_video_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/model/univl_video_base.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/model/univl_video_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/model/univl_video_pretrain.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/model/univl_video_ret.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/model/univl_video_ret.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/pretrain_video_text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/pretrain_video_text/__init__.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/pretrain_video_text/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/pretrain_video_text/builder.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/pretrain_video_text/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/pretrain_video_text/dataset.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/processors.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/task.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/video_text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/video_text/__init__.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/video_text/ret_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/video_text/ret_builder.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/roi_univl/univl/video_text/ret_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/roi_univl/univl/video_text/ret_dataset.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/run.py -------------------------------------------------------------------------------- /prj/cnvid_vtp/scripts/finetune/CN_ret_didemo_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/scripts/finetune/CN_ret_didemo_videoswin.sh -------------------------------------------------------------------------------- /prj/cnvid_vtp/scripts/finetune/CN_ret_msr_vtt_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/scripts/finetune/CN_ret_msr_vtt_videoswin.sh -------------------------------------------------------------------------------- /prj/cnvid_vtp/scripts/finetune/CN_ret_vatex_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/scripts/finetune/CN_ret_vatex_videoswin.sh -------------------------------------------------------------------------------- /prj/cnvid_vtp/scripts/finetune/EN_ret_didemo_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/scripts/finetune/EN_ret_didemo_videoswin.sh -------------------------------------------------------------------------------- /prj/cnvid_vtp/scripts/finetune/EN_ret_msr_vtt_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/scripts/finetune/EN_ret_msr_vtt_videoswin.sh -------------------------------------------------------------------------------- /prj/cnvid_vtp/scripts/finetune/EN_ret_vatex_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/scripts/finetune/EN_ret_vatex_videoswin.sh -------------------------------------------------------------------------------- /prj/cnvid_vtp/scripts/local_test/coco_vg.local.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/scripts/local_test/coco_vg.local.sh -------------------------------------------------------------------------------- /prj/cnvid_vtp/scripts/pretrain/CN_cnvid_pt_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/scripts/pretrain/CN_cnvid_pt_videoswin.sh -------------------------------------------------------------------------------- /prj/cnvid_vtp/scripts/pretrain/EN_coco_vg_cc_pt_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/scripts/pretrain/EN_coco_vg_cc_pt_videoswin.sh -------------------------------------------------------------------------------- /prj/cnvid_vtp/scripts/pretrain/EN_webvid_pt_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/cnvid_vtp/scripts/pretrain/EN_webvid_pt_videoswin.sh -------------------------------------------------------------------------------- /prj/dmae_vtp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/README.md -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/finetune_retrieval/CN_vatex_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/finetune_retrieval/CN_vatex_pvt.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/finetune_retrieval/CN_vatex_pvt_local.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/finetune_retrieval/CN_vatex_pvt_local.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/finetune_retrieval/CN_vatex_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/finetune_retrieval/CN_vatex_videoswin.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/finetune_retrieval/base.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/finetune_retrieval/base.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/finetune_retrieval/didemo_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/finetune_retrieval/didemo_pvt.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/finetune_retrieval/msr_vtt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/finetune_retrieval/msr_vtt.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/finetune_retrieval/msr_vtt_pvt.local.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/finetune_retrieval/msr_vtt_pvt.local.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/finetune_retrieval/msr_vtt_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/finetune_retrieval/msr_vtt_pvt.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/finetune_retrieval/msr_vtt_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/finetune_retrieval/msr_vtt_videoswin.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/visual_encoder/pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/visual_encoder/pvt.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/visual_encoder/resnet.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/visual_encoder/resnet.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/configs/univl/video/visual_encoder/video_swin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/configs/univl/video/visual_encoder/video_swin.yml -------------------------------------------------------------------------------- /prj/dmae_vtp/demo_figs/simple_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/demo_figs/simple_framework.png -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/__init__.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/roi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/roi/__init__.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/roi/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/roi/builder.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/roi/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/roi/dataset.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/roi/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/roi/model.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/roi/region_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/roi/region_processor.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/roi/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/roi/task.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/__init__.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/__init__.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/clip_text_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/clip_text_encoder.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/clip_visual_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/clip_visual_encoder.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/dmae_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/dmae_utils.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/moco_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/moco_utils.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/tpmcl_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/tpmcl_utils.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/univl_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/univl_base.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/univl_classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/univl_classification.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/univl_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/univl_model.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/univl_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/univl_pretrain.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/univl_video_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/univl_video_base.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/univl_video_cls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/univl_video_cls.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/univl_video_multi_choice_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/univl_video_multi_choice_qa.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/univl_video_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/univl_video_pretrain.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/model/univl_video_ret.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/model/univl_video_ret.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/pretrain_img_text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/pretrain_img_text/__init__.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/pretrain_img_text/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/pretrain_img_text/builder.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/pretrain_img_text/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/pretrain_img_text/dataset.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/pretrain_video_text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/pretrain_video_text/__init__.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/pretrain_video_text/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/pretrain_video_text/builder.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/pretrain_video_text/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/pretrain_video_text/dataset.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/processors.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/task.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/video_text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/video_text/__init__.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/video_text/cls_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/video_text/cls_builder.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/video_text/cls_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/video_text/cls_dataset.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/video_text/mc_qa_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/video_text/mc_qa_builder.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/video_text/mc_qa_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/video_text/mc_qa_dataset.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/video_text/ret_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/video_text/ret_builder.py -------------------------------------------------------------------------------- /prj/dmae_vtp/roi_univl/univl/video_text/ret_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/roi_univl/univl/video_text/ret_dataset.py -------------------------------------------------------------------------------- /prj/dmae_vtp/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/run.py -------------------------------------------------------------------------------- /prj/dmae_vtp/scripts/local_test/msr_vtt_pvt.eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/scripts/local_test/msr_vtt_pvt.eval.sh -------------------------------------------------------------------------------- /prj/dmae_vtp/scripts/local_test/msr_vtt_pvt.local.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/scripts/local_test/msr_vtt_pvt.local.sh -------------------------------------------------------------------------------- /prj/dmae_vtp/scripts/local_test/msr_vtt_pvt.train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/dmae_vtp/scripts/local_test/msr_vtt_pvt.train.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/CODEBASE_cn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/CODEBASE_cn.md -------------------------------------------------------------------------------- /prj/snps3_vtp/CODEBASE_en.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/CODEBASE_en.md -------------------------------------------------------------------------------- /prj/snps3_vtp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/README.md -------------------------------------------------------------------------------- /prj/snps3_vtp/auxiliary_files/count_reprocess_webvid.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/auxiliary_files/count_reprocess_webvid.json -------------------------------------------------------------------------------- /prj/snps3_vtp/auxiliary_files/generate_ss_word_json/1st_ss_word_mining.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/auxiliary_files/generate_ss_word_json/1st_ss_word_mining.py -------------------------------------------------------------------------------- /prj/snps3_vtp/auxiliary_files/vocab_simi_list_new.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/auxiliary_files/vocab_simi_list_new.json -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/roi_modelling/roi_model_pretrain.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/roi_modelling/roi_model_pretrain.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/image/pretrain/univl_pretrain.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/image/pretrain/univl_pretrain.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/finetune_classification/msvd_qa_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/finetune_classification/msvd_qa_pvt.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/finetune_multi_choice_qa/base.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/finetune_multi_choice_qa/base.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/finetune_retrieval/base.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/finetune_retrieval/base.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/finetune_retrieval/didemo_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/finetune_retrieval/didemo_pvt.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/finetune_retrieval/didemo_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/finetune_retrieval/didemo_videoswin.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/finetune_retrieval/msr_vtt_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/finetune_retrieval/msr_vtt_pvt.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/finetune_retrieval/msr_vtt_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/finetune_retrieval/msr_vtt_videoswin.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/finetune_retrieval/msvd_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/finetune_retrieval/msvd_pvt.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/finetune_retrieval/msvd_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/finetune_retrieval/msvd_videoswin.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/pretrain/base.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/pretrain/base.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/pretrain/cc_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/pretrain/cc_videoswin.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/pretrain/coco_vg_pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/pretrain/coco_vg_pvt.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/pretrain/coco_vg_resnet.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/pretrain/coco_vg_resnet.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/pretrain/howto100m_coco_vg_rnd_asr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/pretrain/howto100m_coco_vg_rnd_asr.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/pretrain/quick_test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/pretrain/quick_test.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/pretrain/video_swin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/pretrain/video_swin.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/pretrain/webvid_videoswin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/pretrain/webvid_videoswin.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/visual_encoder/pvt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/visual_encoder/pvt.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/visual_encoder/resnet.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/visual_encoder/resnet.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/configs/univl/video/visual_encoder/video_swin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/configs/univl/video/visual_encoder/video_swin.yml -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/__init__.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/roi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/roi/__init__.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/roi/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/roi/builder.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/roi/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/roi/dataset.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/roi/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/roi/model.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/roi/region_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/roi/region_processor.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/roi/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/roi/task.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/__init__.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/__init__.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/clip_text_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/clip_text_encoder.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/clip_visual_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/clip_visual_encoder.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/moco_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/moco_utils.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/univl_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/univl_base.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/univl_classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/univl_classification.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/univl_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/univl_model.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/univl_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/univl_pretrain.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/univl_video_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/univl_video_base.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/univl_video_cls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/univl_video_cls.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/univl_video_multi_choice_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/univl_video_multi_choice_qa.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/univl_video_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/univl_video_pretrain.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/model/univl_video_ret.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/model/univl_video_ret.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/pretrain_img_text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/pretrain_img_text/__init__.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/pretrain_img_text/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/pretrain_img_text/builder.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/pretrain_img_text/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/pretrain_img_text/dataset.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/pretrain_video_text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/pretrain_video_text/__init__.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/pretrain_video_text/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/pretrain_video_text/builder.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/pretrain_video_text/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/pretrain_video_text/dataset.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/processors.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/task.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/video_text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/video_text/__init__.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/video_text/cls_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/video_text/cls_builder.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/video_text/cls_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/video_text/cls_dataset.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/video_text/mc_qa_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/video_text/mc_qa_builder.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/video_text/mc_qa_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/video_text/mc_qa_dataset.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/video_text/ret_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/video_text/ret_builder.py -------------------------------------------------------------------------------- /prj/snps3_vtp/roi_univl/univl/video_text/ret_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/roi_univl/univl/video_text/ret_dataset.py -------------------------------------------------------------------------------- /prj/snps3_vtp/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/run.py -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/finetune/multi_choice_qa/msr_vtt_mc_qa_pvt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/finetune/multi_choice_qa/msr_vtt_mc_qa_pvt.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/finetune/text2video_retrieval/didemo_ret_pvt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/finetune/text2video_retrieval/didemo_ret_pvt.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/finetune/text2video_retrieval/msr_vtt_ret_pvt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/finetune/text2video_retrieval/msr_vtt_ret_pvt.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/finetune/text2video_retrieval/msvd_ret_pvt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/finetune/text2video_retrieval/msvd_ret_pvt.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/finetune/video_qa/msrvtt_qa_pvt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/finetune/video_qa/msrvtt_qa_pvt.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/finetune/video_qa/msrvtt_qa_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/finetune/video_qa/msrvtt_qa_videoswin.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/finetune/video_qa/msvd_qa_pvt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/finetune/video_qa/msvd_qa_pvt.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/finetune/video_qa/msvd_qa_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/finetune/video_qa/msvd_qa_videoswin.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/local_test/snps3_quick_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/local_test/snps3_quick_test.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/pretrain/cc_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/pretrain/cc_videoswin.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/pretrain/coco_vg_pvt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/pretrain/coco_vg_pvt.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/pretrain/coco_vg_resnet.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/pretrain/coco_vg_resnet.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/scripts/pretrain/webvid_videoswin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/scripts/pretrain/webvid_videoswin.sh -------------------------------------------------------------------------------- /prj/snps3_vtp/z_figs/ablation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/z_figs/ablation.jpg -------------------------------------------------------------------------------- /prj/snps3_vtp/z_figs/performance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/prj/snps3_vtp/z_figs/performance.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/requirements.txt -------------------------------------------------------------------------------- /tests/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/.DS_Store -------------------------------------------------------------------------------- /tests/data/image/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/image/.DS_Store -------------------------------------------------------------------------------- /tests/data/image/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/image/dog.jpg -------------------------------------------------------------------------------- /tests/data/video/VATEX_CN.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/video/VATEX_CN.jsonl -------------------------------------------------------------------------------- /tests/data/video/data/asr_files/video9770: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/video/data/asr_files/video9770 -------------------------------------------------------------------------------- /tests/data/video/data/asr_files/video9771: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/video/data/asr_files/video9771 -------------------------------------------------------------------------------- /tests/data/video/data/mp4/video9770.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/video/data/mp4/video9770.mp4 -------------------------------------------------------------------------------- /tests/data/video/data/mp4/video9771.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/video/data/mp4/video9771.mp4 -------------------------------------------------------------------------------- /tests/data/video/msrvtt_multi_choice_qa.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/video/msrvtt_multi_choice_qa.jsonl -------------------------------------------------------------------------------- /tests/data/video/msrvtt_test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/video/msrvtt_test.jsonl -------------------------------------------------------------------------------- /tests/data/video/msrvtt_train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/video/msrvtt_train.jsonl -------------------------------------------------------------------------------- /tests/data/video/ucf101_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/video/ucf101_sample.jsonl -------------------------------------------------------------------------------- /tests/data/video/univl_img.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/video/univl_img.jsonl -------------------------------------------------------------------------------- /tests/data/video/univl_video.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/video/univl_video.jsonl -------------------------------------------------------------------------------- /tests/data/vocab.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/vocab.txt -------------------------------------------------------------------------------- /tests/data/vocabs/bert-base-chinese_21128_vocab.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/vocabs/bert-base-chinese_21128_vocab.txt -------------------------------------------------------------------------------- /tests/data/vocabs/bert-base-uncased_30522_vocab.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/Ant-Multi-Modal-Framework/HEAD/tests/data/vocabs/bert-base-uncased_30522_vocab.txt --------------------------------------------------------------------------------