├── .DS_Store ├── Annotation_Pipeline ├── Phase I │ ├── Modality_Expansion.py │ └── minigpt4 │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ └── __init__.cpython-39.pyc │ │ ├── common │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── config.cpython-39.pyc │ │ │ ├── dist_utils.cpython-38.pyc │ │ │ ├── dist_utils.cpython-39.pyc │ │ │ ├── logger.cpython-38.pyc │ │ │ ├── logger.cpython-39.pyc │ │ │ ├── registry.cpython-38.pyc │ │ │ ├── registry.cpython-39.pyc │ │ │ ├── utils.cpython-38.pyc │ │ │ └── utils.cpython-39.pyc │ │ ├── config.py │ │ ├── dist_utils.py │ │ ├── gradcam.py │ │ ├── logger.py │ │ ├── optims.py │ │ ├── registry.py │ │ └── utils.py │ │ ├── configs │ │ ├── datasets │ │ │ ├── cc_sbu │ │ │ │ ├── align.yaml │ │ │ │ └── defaults.yaml │ │ │ └── laion │ │ │ │ └── defaults.yaml │ │ ├── default.yaml │ │ └── models │ │ │ └── minigpt4.yaml │ │ ├── conversation │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ └── conversation.cpython-39.pyc │ │ └── conversation.py │ │ ├── datasets │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ └── data_utils.cpython-39.pyc │ │ ├── builders │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ ├── base_dataset_builder.cpython-38.pyc │ │ │ │ ├── base_dataset_builder.cpython-39.pyc │ │ │ │ ├── image_text_pair_builder.cpython-38.pyc │ │ │ │ └── image_text_pair_builder.cpython-39.pyc │ │ │ ├── base_dataset_builder.py │ │ │ └── image_text_pair_builder.py │ │ ├── data_utils.py │ │ └── datasets │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── base_dataset.cpython-38.pyc │ │ │ ├── base_dataset.cpython-39.pyc │ │ │ ├── caption_datasets.cpython-38.pyc │ │ │ ├── caption_datasets.cpython-39.pyc │ │ │ ├── cc_sbu_dataset.cpython-38.pyc │ │ │ ├── cc_sbu_dataset.cpython-39.pyc │ │ │ ├── dataloader_utils.cpython-39.pyc │ │ │ ├── laion_dataset.cpython-38.pyc │ │ │ └── laion_dataset.cpython-39.pyc │ │ │ ├── base_dataset.py │ │ │ ├── caption_datasets.py │ │ │ ├── cc_sbu_dataset.py │ │ │ ├── dataloader_utils.py │ │ │ └── laion_dataset.py │ │ ├── models │ │ ├── Qformer.py │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── Qformer.cpython-38.pyc │ │ │ ├── Qformer.cpython-39.pyc │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── base_model.cpython-38.pyc │ │ │ ├── base_model.cpython-39.pyc │ │ │ ├── blip2.cpython-38.pyc │ │ │ ├── blip2.cpython-39.pyc │ │ │ ├── eva_vit.cpython-38.pyc │ │ │ ├── eva_vit.cpython-39.pyc │ │ │ ├── mini_gpt4.cpython-38.pyc │ │ │ ├── mini_gpt4.cpython-39.pyc │ │ │ ├── modeling_llama.cpython-38.pyc │ │ │ └── modeling_llama.cpython-39.pyc │ │ ├── base_model.py │ │ ├── blip2.py │ │ ├── blip2_outputs.py │ │ ├── eva_vit.py │ │ ├── mini_gpt4.py │ │ └── modeling_llama.py │ │ ├── processors │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── base_processor.cpython-38.pyc │ │ │ ├── base_processor.cpython-39.pyc │ │ │ ├── blip_processors.cpython-38.pyc │ │ │ ├── blip_processors.cpython-39.pyc │ │ │ ├── randaugment.cpython-38.pyc │ │ │ └── randaugment.cpython-39.pyc │ │ ├── base_processor.py │ │ ├── blip_processors.py │ │ └── randaugment.py │ │ ├── runners │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ └── runner_base.cpython-39.pyc │ │ └── runner_base.py │ │ └── tasks │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── base_task.cpython-39.pyc │ │ └── image_text_pretrain.cpython-39.pyc │ │ ├── base_task.py │ │ └── image_text_pretrain.py └── Phase II │ ├── GroundingDINO │ ├── groundingdino │ │ ├── _C.cpython-37m-x86_64-linux-gnu.so │ │ ├── _C.cpython-38-x86_64-linux-gnu.so │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── __init__.cpython-38.pyc │ │ ├── config │ │ │ ├── GroundingDINO_SwinB.py │ │ │ └── GroundingDINO_SwinT_OGC.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── transforms.cpython-37.pyc │ │ │ │ └── transforms.cpython-38.pyc │ │ │ └── transforms.py │ │ ├── models │ │ │ ├── GroundingDINO │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ ├── bertwarper.cpython-37.pyc │ │ │ │ │ ├── bertwarper.cpython-38.pyc │ │ │ │ │ ├── fuse_modules.cpython-37.pyc │ │ │ │ │ ├── fuse_modules.cpython-38.pyc │ │ │ │ │ ├── groundingdino.cpython-37.pyc │ │ │ │ │ ├── groundingdino.cpython-38.pyc │ │ │ │ │ ├── ms_deform_attn.cpython-37.pyc │ │ │ │ │ ├── ms_deform_attn.cpython-38.pyc │ │ │ │ │ ├── transformer.cpython-37.pyc │ │ │ │ │ ├── transformer.cpython-38.pyc │ │ │ │ │ ├── transformer_vanilla.cpython-37.pyc │ │ │ │ │ ├── transformer_vanilla.cpython-38.pyc │ │ │ │ │ ├── utils.cpython-37.pyc │ │ │ │ │ └── utils.cpython-38.pyc │ │ │ │ ├── backbone │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ │ ├── backbone.cpython-37.pyc │ │ │ │ │ │ ├── backbone.cpython-38.pyc │ │ │ │ │ │ ├── position_encoding.cpython-37.pyc │ │ │ │ │ │ ├── position_encoding.cpython-38.pyc │ │ │ │ │ │ ├── swin_transformer.cpython-37.pyc │ │ │ │ │ │ └── swin_transformer.cpython-38.pyc │ │ │ │ │ ├── backbone.py │ │ │ │ │ ├── position_encoding.py │ │ │ │ │ └── swin_transformer.py │ │ │ │ ├── bertwarper.py │ │ │ │ ├── csrc │ │ │ │ │ ├── MsDeformAttn │ │ │ │ │ │ ├── ms_deform_attn.h │ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp │ │ │ │ │ │ ├── ms_deform_attn_cpu.h │ │ │ │ │ │ ├── ms_deform_attn_cuda.cu │ │ │ │ │ │ ├── ms_deform_attn_cuda.h │ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh │ │ │ │ │ ├── cuda_version.cu │ │ │ │ │ └── vision.cpp │ │ │ │ ├── fuse_modules.py │ │ │ │ ├── groundingdino.py │ │ │ │ ├── ms_deform_attn.py │ │ │ │ ├── transformer.py │ │ │ │ ├── transformer_vanilla.py │ │ │ │ └── utils.py │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── registry.cpython-37.pyc │ │ │ │ └── registry.cpython-38.pyc │ │ │ └── registry.py │ │ ├── util │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── box_ops.cpython-37.pyc │ │ │ │ ├── box_ops.cpython-38.pyc │ │ │ │ ├── get_tokenlizer.cpython-37.pyc │ │ │ │ ├── get_tokenlizer.cpython-38.pyc │ │ │ │ ├── inference.cpython-37.pyc │ │ │ │ ├── inference.cpython-38.pyc │ │ │ │ ├── misc.cpython-37.pyc │ │ │ │ ├── misc.cpython-38.pyc │ │ │ │ ├── slconfig.cpython-37.pyc │ │ │ │ ├── slconfig.cpython-38.pyc │ │ │ │ ├── utils.cpython-37.pyc │ │ │ │ ├── utils.cpython-38.pyc │ │ │ │ ├── visualizer.cpython-37.pyc │ │ │ │ ├── visualizer.cpython-38.pyc │ │ │ │ ├── vl_utils.cpython-37.pyc │ │ │ │ └── vl_utils.cpython-38.pyc │ │ │ ├── box_ops.py │ │ │ ├── get_tokenlizer.py │ │ │ ├── inference.py │ │ │ ├── logger.py │ │ │ ├── misc.py │ │ │ ├── slconfig.py │ │ │ ├── slio.py │ │ │ ├── time_counter.py │ │ │ ├── utils.py │ │ │ ├── visualizer.py │ │ │ └── vl_utils.py │ │ └── version.py │ └── setup.py │ ├── generation.py │ ├── generation_mask.py │ └── segment_anything_ │ ├── segment_anything │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── automatic_mask_generator.cpython-37.pyc │ │ ├── automatic_mask_generator.cpython-38.pyc │ │ ├── build_sam.cpython-37.pyc │ │ ├── build_sam.cpython-38.pyc │ │ ├── predictor.cpython-37.pyc │ │ └── predictor.cpython-38.pyc │ ├── automatic_mask_generator.py │ ├── build_sam.py │ ├── modeling │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── common.cpython-37.pyc │ │ │ ├── common.cpython-38.pyc │ │ │ ├── image_encoder.cpython-37.pyc │ │ │ ├── image_encoder.cpython-38.pyc │ │ │ ├── mask_decoder.cpython-37.pyc │ │ │ ├── mask_decoder.cpython-38.pyc │ │ │ ├── prompt_encoder.cpython-37.pyc │ │ │ ├── prompt_encoder.cpython-38.pyc │ │ │ ├── sam.cpython-37.pyc │ │ │ ├── sam.cpython-38.pyc │ │ │ ├── transformer.cpython-37.pyc │ │ │ └── transformer.cpython-38.pyc │ │ ├── common.py │ │ ├── image_encoder.py │ │ ├── mask_decoder.py │ │ ├── prompt_encoder.py │ │ ├── sam.py │ │ └── transformer.py │ ├── predictor.py │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── amg.cpython-37.pyc │ │ ├── amg.cpython-38.pyc │ │ ├── transforms.cpython-37.pyc │ │ └── transforms.cpython-38.pyc │ │ ├── amg.py │ │ ├── onnx.py │ │ └── transforms.py │ └── setup.py ├── Method ├── .DS_Store ├── __pycache__ │ ├── openai.cpython-38.pyc │ ├── optim.cpython-37.pyc │ ├── optim.cpython-38.pyc │ ├── scheduler.cpython-37.pyc │ └── scheduler.cpython-38.pyc ├── accelerators │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── accelerator.cpython-37.pyc │ │ └── apex_ddp_accelerator.cpython-37.pyc │ ├── accelerator.py │ └── apex_ddp_accelerator.py ├── config.yaml ├── configs │ ├── config_bert.json │ ├── config_clipvitB.json │ ├── config_roberta.json │ ├── config_swinB_224.json │ ├── config_swinB_384.json │ ├── config_swinB_480.json │ ├── re_bbox.yaml │ └── vlue-base-test │ │ ├── Grounding_bbox.yaml │ │ ├── Grounding_weakly.yaml │ │ ├── NLVR.yaml │ │ ├── Retrieval.yaml │ │ ├── VQA.yaml │ │ └── VQA_480.yaml ├── dataset │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── coco_karpathy_dataset.cpython-37.pyc │ │ ├── coco_karpathy_dataset.cpython-38.pyc │ │ ├── dist_dataset.cpython-37.pyc │ │ ├── dist_dataset.cpython-38.pyc │ │ ├── grounding_dataset.cpython-37.pyc │ │ ├── grounding_dataset.cpython-38.pyc │ │ ├── nlvr_dataset.cpython-37.pyc │ │ ├── nlvr_dataset.cpython-38.pyc │ │ ├── pretrain_dataset.cpython-37.pyc │ │ ├── pretrain_dataset.cpython-38.pyc │ │ ├── randaugment.cpython-37.pyc │ │ ├── randaugment.cpython-38.pyc │ │ ├── re_bbox_dataset.cpython-38.pyc │ │ ├── re_dataset.cpython-37.pyc │ │ ├── re_dataset.cpython-38.pyc │ │ ├── utils.cpython-37.pyc │ │ ├── utils.cpython-38.pyc │ │ ├── vqa_dataset.cpython-37.pyc │ │ └── vqa_dataset.cpython-38.pyc │ ├── coco_karpathy_dataset.py │ ├── dist_dataset.py │ ├── randaugment.py │ ├── re_bbox_dataset.py │ └── utils.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── box_ops.cpython-37.pyc │ │ ├── box_ops.cpython-38.pyc │ │ ├── clip_vit.cpython-37.pyc │ │ ├── clip_vit.cpython-38.pyc │ │ ├── model_bbox.cpython-38.pyc │ │ ├── model_pretrain.cpython-37.pyc │ │ ├── model_re_bbox.cpython-38.pyc │ │ ├── model_retrieval.cpython-37.pyc │ │ ├── model_retrieval.cpython-38.pyc │ │ ├── model_vqa.cpython-38.pyc │ │ ├── swin_transformer.cpython-37.pyc │ │ ├── swin_transformer.cpython-38.pyc │ │ ├── tokenization_bert.cpython-37.pyc │ │ ├── tokenization_bert.cpython-38.pyc │ │ ├── tokenization_roberta.cpython-37.pyc │ │ ├── tokenization_roberta.cpython-38.pyc │ │ ├── vit.cpython-37.pyc │ │ ├── vit.cpython-38.pyc │ │ ├── xbert.cpython-37.pyc │ │ ├── xbert.cpython-38.pyc │ │ ├── xroberta.cpython-37.pyc │ │ ├── xroberta.cpython-38.pyc │ │ ├── xvlm.cpython-37.pyc │ │ └── xvlm.cpython-38.pyc │ ├── box_ops.py │ ├── clip_vit.py │ ├── model_re_bbox.py │ ├── swin_transformer.py │ ├── test.py │ ├── tokenization_bert.py │ ├── tokenization_roberta.py │ ├── vit.py │ ├── xbert.py │ ├── xroberta.py │ └── xvlm.py ├── optim.py ├── output │ └── all_output_eva │ │ └── config.yaml ├── re_bbox.py ├── requirements.txt ├── run.py ├── scheduler.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-38.pyc │ ├── checkpointer.cpython-37.pyc │ ├── checkpointer.cpython-38.pyc │ ├── hdfs_io.cpython-37.pyc │ ├── hdfs_io.cpython-38.pyc │ ├── torch_io.cpython-37.pyc │ └── torch_io.cpython-38.pyc │ ├── checkpointer.py │ ├── cider │ └── pyciderevalcap │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── __init__.cpython-38.pyc │ │ ├── cider │ │ ├── __init__.py │ │ ├── cider.py │ │ └── cider_scorer.py │ │ └── ciderD │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── ciderD.cpython-37.pyc │ │ ├── ciderD.cpython-38.pyc │ │ ├── ciderD_scorer.cpython-37.pyc │ │ └── ciderD_scorer.cpython-38.pyc │ │ ├── ciderD.py │ │ └── ciderD_scorer.py │ ├── hdfs_io.py │ └── torch_io.py ├── README.md └── requirements.txt /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/.DS_Store -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | import os 9 | import sys 10 | 11 | from omegaconf import OmegaConf 12 | 13 | from minigpt4.common.registry import registry 14 | 15 | from minigpt4.datasets.builders import * 16 | from minigpt4.models import * 17 | from minigpt4.processors import * 18 | from minigpt4.tasks import * 19 | 20 | 21 | root_dir = os.path.dirname(os.path.abspath(__file__)) 22 | default_cfg = OmegaConf.load(os.path.join(root_dir, "configs/default.yaml")) 23 | 24 | registry.register_path("library_root", root_dir) 25 | repo_root = os.path.join(root_dir, "..") 26 | registry.register_path("repo_root", repo_root) 27 | cache_root = os.path.join(repo_root, default_cfg.env.cache_root) 28 | registry.register_path("cache_root", cache_root) 29 | 30 | registry.register("MAX_INT", sys.maxsize) 31 | registry.register("SPLIT_NAMES", ["train", "val", "test"]) 32 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__init__.py -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/config.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/config.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/dist_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/dist_utils.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/dist_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/dist_utils.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/logger.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/logger.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/logger.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/logger.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/registry.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/registry.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/registry.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/registry.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/dist_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | import datetime 9 | import functools 10 | import os 11 | 12 | import torch 13 | import torch.distributed as dist 14 | import timm.models.hub as timm_hub 15 | 16 | 17 | def setup_for_distributed(is_master): 18 | """ 19 | This function disables printing when not in master process 20 | """ 21 | import builtins as __builtin__ 22 | 23 | builtin_print = __builtin__.print 24 | 25 | def print(*args, **kwargs): 26 | force = kwargs.pop("force", False) 27 | if is_master or force: 28 | builtin_print(*args, **kwargs) 29 | 30 | __builtin__.print = print 31 | 32 | 33 | def is_dist_avail_and_initialized(): 34 | if not dist.is_available(): 35 | return False 36 | if not dist.is_initialized(): 37 | return False 38 | return True 39 | 40 | 41 | def get_world_size(): 42 | if not is_dist_avail_and_initialized(): 43 | return 1 44 | return dist.get_world_size() 45 | 46 | 47 | def get_rank(): 48 | if not is_dist_avail_and_initialized(): 49 | return 0 50 | return dist.get_rank() 51 | 52 | 53 | def is_main_process(): 54 | return get_rank() == 0 55 | 56 | 57 | def init_distributed_mode(args): 58 | if "RANK" in os.environ and "WORLD_SIZE" in os.environ: 59 | args.rank = int(os.environ["RANK"]) 60 | args.world_size = int(os.environ["WORLD_SIZE"]) 61 | args.gpu = int(os.environ["LOCAL_RANK"]) 62 | elif "SLURM_PROCID" in os.environ: 63 | args.rank = int(os.environ["SLURM_PROCID"]) 64 | args.gpu = args.rank % torch.cuda.device_count() 65 | else: 66 | print("Not using distributed mode") 67 | args.distributed = False 68 | return 69 | 70 | args.distributed = True 71 | 72 | torch.cuda.set_device(args.gpu) 73 | args.dist_backend = "nccl" 74 | print( 75 | "| distributed init (rank {}, world {}): {}".format( 76 | args.rank, args.world_size, args.dist_url 77 | ), 78 | flush=True, 79 | ) 80 | torch.distributed.init_process_group( 81 | backend=args.dist_backend, 82 | init_method=args.dist_url, 83 | world_size=args.world_size, 84 | rank=args.rank, 85 | timeout=datetime.timedelta( 86 | days=365 87 | ), # allow auto-downloading and de-compressing 88 | ) 89 | torch.distributed.barrier() 90 | setup_for_distributed(args.rank == 0) 91 | 92 | 93 | def get_dist_info(): 94 | if torch.__version__ < "1.0": 95 | initialized = dist._initialized 96 | else: 97 | initialized = dist.is_initialized() 98 | if initialized: 99 | rank = dist.get_rank() 100 | world_size = dist.get_world_size() 101 | else: # non-distributed training 102 | rank = 0 103 | world_size = 1 104 | return rank, world_size 105 | 106 | 107 | def main_process(func): 108 | @functools.wraps(func) 109 | def wrapper(*args, **kwargs): 110 | rank, _ = get_dist_info() 111 | if rank == 0: 112 | return func(*args, **kwargs) 113 | 114 | return wrapper 115 | 116 | 117 | def download_cached_file(url, check_hash=True, progress=False): 118 | """ 119 | Download a file from a URL and cache it locally. If the file already exists, it is not downloaded again. 120 | If distributed, only the main process downloads the file, and the other processes wait for the file to be downloaded. 121 | """ 122 | 123 | def get_cached_file_path(): 124 | # a hack to sync the file path across processes 125 | parts = torch.hub.urlparse(url) 126 | filename = os.path.basename(parts.path) 127 | cached_file = os.path.join(timm_hub.get_cache_dir(), filename) 128 | 129 | return cached_file 130 | 131 | if is_main_process(): 132 | timm_hub.download_cached_file(url, check_hash, progress) 133 | 134 | if is_dist_avail_and_initialized(): 135 | dist.barrier() 136 | 137 | return get_cached_file_path() 138 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/gradcam.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | from scipy.ndimage import filters 4 | from skimage import transform as skimage_transform 5 | 6 | 7 | def getAttMap(img, attMap, blur=True, overlap=True): 8 | attMap -= attMap.min() 9 | if attMap.max() > 0: 10 | attMap /= attMap.max() 11 | attMap = skimage_transform.resize(attMap, (img.shape[:2]), order=3, mode="constant") 12 | if blur: 13 | attMap = filters.gaussian_filter(attMap, 0.02 * max(img.shape[:2])) 14 | attMap -= attMap.min() 15 | attMap /= attMap.max() 16 | cmap = plt.get_cmap("jet") 17 | attMapV = cmap(attMap) 18 | attMapV = np.delete(attMapV, 3, 2) 19 | if overlap: 20 | attMap = ( 21 | 1 * (1 - attMap**0.7).reshape(attMap.shape + (1,)) * img 22 | + (attMap**0.7).reshape(attMap.shape + (1,)) * attMapV 23 | ) 24 | return attMap 25 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/common/optims.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | import math 9 | 10 | from minigpt4.common.registry import registry 11 | 12 | 13 | @registry.register_lr_scheduler("linear_warmup_step_lr") 14 | class LinearWarmupStepLRScheduler: 15 | def __init__( 16 | self, 17 | optimizer, 18 | max_epoch, 19 | min_lr, 20 | init_lr, 21 | decay_rate=1, 22 | warmup_start_lr=-1, 23 | warmup_steps=0, 24 | **kwargs 25 | ): 26 | self.optimizer = optimizer 27 | 28 | self.max_epoch = max_epoch 29 | self.min_lr = min_lr 30 | 31 | self.decay_rate = decay_rate 32 | 33 | self.init_lr = init_lr 34 | self.warmup_steps = warmup_steps 35 | self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr 36 | 37 | def step(self, cur_epoch, cur_step): 38 | if cur_epoch == 0: 39 | warmup_lr_schedule( 40 | step=cur_step, 41 | optimizer=self.optimizer, 42 | max_step=self.warmup_steps, 43 | init_lr=self.warmup_start_lr, 44 | max_lr=self.init_lr, 45 | ) 46 | else: 47 | step_lr_schedule( 48 | epoch=cur_epoch, 49 | optimizer=self.optimizer, 50 | init_lr=self.init_lr, 51 | min_lr=self.min_lr, 52 | decay_rate=self.decay_rate, 53 | ) 54 | 55 | 56 | @registry.register_lr_scheduler("linear_warmup_cosine_lr") 57 | class LinearWarmupCosineLRScheduler: 58 | def __init__( 59 | self, 60 | optimizer, 61 | max_epoch, 62 | iters_per_epoch, 63 | min_lr, 64 | init_lr, 65 | warmup_steps=0, 66 | warmup_start_lr=-1, 67 | **kwargs 68 | ): 69 | self.optimizer = optimizer 70 | 71 | self.max_epoch = max_epoch 72 | self.iters_per_epoch = iters_per_epoch 73 | self.min_lr = min_lr 74 | 75 | self.init_lr = init_lr 76 | self.warmup_steps = warmup_steps 77 | self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr 78 | 79 | def step(self, cur_epoch, cur_step): 80 | total_cur_step = cur_epoch * self.iters_per_epoch + cur_step 81 | if total_cur_step < self.warmup_steps: 82 | warmup_lr_schedule( 83 | step=cur_step, 84 | optimizer=self.optimizer, 85 | max_step=self.warmup_steps, 86 | init_lr=self.warmup_start_lr, 87 | max_lr=self.init_lr, 88 | ) 89 | else: 90 | cosine_lr_schedule( 91 | epoch=total_cur_step, 92 | optimizer=self.optimizer, 93 | max_epoch=self.max_epoch * self.iters_per_epoch, 94 | init_lr=self.init_lr, 95 | min_lr=self.min_lr, 96 | ) 97 | 98 | 99 | def cosine_lr_schedule(optimizer, epoch, max_epoch, init_lr, min_lr): 100 | """Decay the learning rate""" 101 | lr = (init_lr - min_lr) * 0.5 * ( 102 | 1.0 + math.cos(math.pi * epoch / max_epoch) 103 | ) + min_lr 104 | for param_group in optimizer.param_groups: 105 | param_group["lr"] = lr 106 | 107 | 108 | def warmup_lr_schedule(optimizer, step, max_step, init_lr, max_lr): 109 | """Warmup the learning rate""" 110 | lr = min(max_lr, init_lr + (max_lr - init_lr) * step / max(max_step, 1)) 111 | for param_group in optimizer.param_groups: 112 | param_group["lr"] = lr 113 | 114 | 115 | def step_lr_schedule(optimizer, epoch, init_lr, min_lr, decay_rate): 116 | """Decay the learning rate""" 117 | lr = max(min_lr, init_lr * (decay_rate**epoch)) 118 | for param_group in optimizer.param_groups: 119 | param_group["lr"] = lr 120 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/configs/datasets/cc_sbu/align.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | cc_sbu_align: 3 | data_type: images 4 | build_info: 5 | storage: /path/to/cc_sbu_align/ 6 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/configs/datasets/cc_sbu/defaults.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | cc_sbu: 3 | data_type: images 4 | build_info: 5 | storage: /path/to/cc_sbu_dataset/{00000..01255}.tar 6 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/configs/datasets/laion/defaults.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | laion: 3 | data_type: images 4 | build_info: 5 | storage: /path/to/laion_dataset/{00000..10488}.tar 6 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/configs/default.yaml: -------------------------------------------------------------------------------- 1 | env: 2 | # For default users 3 | # cache_root: "cache" 4 | # For internal use with persistent storage 5 | cache_root: "/export/home/.cache/minigpt4" 6 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/configs/models/minigpt4.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | arch: mini_gpt4 3 | 4 | # vit encoder 5 | image_size: 224 6 | drop_path_rate: 0 7 | use_grad_checkpoint: False 8 | vit_precision: "fp16" 9 | freeze_vit: True 10 | freeze_qformer: True 11 | 12 | # Q-Former 13 | num_query_token: 32 14 | 15 | # Vicuna 16 | llama_model: "/storage_fast/mchu/Multi-model/mini/MiniGPT-4/working" 17 | 18 | # generation configs 19 | prompt: "" 20 | 21 | preprocess: 22 | vis_processor: 23 | train: 24 | name: "blip2_image_train" 25 | image_size: 224 26 | eval: 27 | name: "blip2_image_eval" 28 | image_size: 224 29 | text_processor: 30 | train: 31 | name: "blip_caption" 32 | eval: 33 | name: "blip_caption" 34 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/conversation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/conversation/__init__.py -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/conversation/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/conversation/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/conversation/__pycache__/conversation.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/conversation/__pycache__/conversation.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/__init__.py -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/data_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/data_utils.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | from minigpt4.datasets.builders.base_dataset_builder import load_dataset_config 9 | from minigpt4.datasets.builders.image_text_pair_builder import ( 10 | CCSBUBuilder, 11 | LaionBuilder, 12 | CCSBUAlignBuilder 13 | ) 14 | from minigpt4.common.registry import registry 15 | 16 | __all__ = [ 17 | "CCSBUBuilder", 18 | "LaionBuilder", 19 | "CCSBUAlignBuilder" 20 | ] 21 | 22 | 23 | def load_dataset(name, cfg_path=None, vis_path=None, data_type=None): 24 | """ 25 | Example 26 | 27 | >>> dataset = load_dataset("coco_caption", cfg=None) 28 | >>> splits = dataset.keys() 29 | >>> print([len(dataset[split]) for split in splits]) 30 | 31 | """ 32 | if cfg_path is None: 33 | cfg = None 34 | else: 35 | cfg = load_dataset_config(cfg_path) 36 | 37 | try: 38 | builder = registry.get_builder_class(name)(cfg) 39 | except TypeError: 40 | print( 41 | f"Dataset {name} not found. Available datasets:\n" 42 | + ", ".join([str(k) for k in dataset_zoo.get_names()]) 43 | ) 44 | exit(1) 45 | 46 | if vis_path is not None: 47 | if data_type is None: 48 | # use default data type in the config 49 | data_type = builder.config.data_type 50 | 51 | assert ( 52 | data_type in builder.config.build_info 53 | ), f"Invalid data_type {data_type} for {name}." 54 | 55 | builder.config.build_info.get(data_type).storage = vis_path 56 | 57 | dataset = builder.build_datasets() 58 | return dataset 59 | 60 | 61 | class DatasetZoo: 62 | def __init__(self) -> None: 63 | self.dataset_zoo = { 64 | k: list(v.DATASET_CONFIG_DICT.keys()) 65 | for k, v in sorted(registry.mapping["builder_name_mapping"].items()) 66 | } 67 | 68 | def get_names(self): 69 | return list(self.dataset_zoo.keys()) 70 | 71 | 72 | dataset_zoo = DatasetZoo() 73 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/base_dataset_builder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/base_dataset_builder.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/base_dataset_builder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/base_dataset_builder.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/image_text_pair_builder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/image_text_pair_builder.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/image_text_pair_builder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/image_text_pair_builder.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/builders/image_text_pair_builder.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import warnings 4 | 5 | from minigpt4.common.registry import registry 6 | from minigpt4.datasets.builders.base_dataset_builder import BaseDatasetBuilder 7 | from minigpt4.datasets.datasets.laion_dataset import LaionDataset 8 | from minigpt4.datasets.datasets.cc_sbu_dataset import CCSBUDataset, CCSBUAlignDataset 9 | 10 | 11 | @registry.register_builder("cc_sbu") 12 | class CCSBUBuilder(BaseDatasetBuilder): 13 | train_dataset_cls = CCSBUDataset 14 | 15 | DATASET_CONFIG_DICT = {"default": "configs/datasets/cc_sbu/defaults.yaml"} 16 | 17 | def _download_ann(self): 18 | pass 19 | 20 | def _download_vis(self): 21 | pass 22 | 23 | def build(self): 24 | self.build_processors() 25 | 26 | build_info = self.config.build_info 27 | 28 | datasets = dict() 29 | split = "train" 30 | 31 | # create datasets 32 | # [NOTE] return inner_datasets (wds.DataPipeline) 33 | dataset_cls = self.train_dataset_cls 34 | datasets[split] = dataset_cls( 35 | vis_processor=self.vis_processors[split], 36 | text_processor=self.text_processors[split], 37 | location=build_info.storage, 38 | ).inner_dataset 39 | 40 | return datasets 41 | 42 | 43 | @registry.register_builder("laion") 44 | class LaionBuilder(BaseDatasetBuilder): 45 | train_dataset_cls = LaionDataset 46 | 47 | DATASET_CONFIG_DICT = {"default": "configs/datasets/laion/defaults.yaml"} 48 | 49 | def _download_ann(self): 50 | pass 51 | 52 | def _download_vis(self): 53 | pass 54 | 55 | def build(self): 56 | self.build_processors() 57 | 58 | build_info = self.config.build_info 59 | 60 | datasets = dict() 61 | split = "train" 62 | 63 | # create datasets 64 | # [NOTE] return inner_datasets (wds.DataPipeline) 65 | dataset_cls = self.train_dataset_cls 66 | datasets[split] = dataset_cls( 67 | vis_processor=self.vis_processors[split], 68 | text_processor=self.text_processors[split], 69 | location=build_info.storage, 70 | ).inner_dataset 71 | 72 | return datasets 73 | 74 | 75 | @registry.register_builder("cc_sbu_align") 76 | class CCSBUAlignBuilder(BaseDatasetBuilder): 77 | train_dataset_cls = CCSBUAlignDataset 78 | 79 | DATASET_CONFIG_DICT = { 80 | "default": "configs/datasets/cc_sbu/align.yaml", 81 | } 82 | 83 | def build_datasets(self): 84 | # at this point, all the annotations and image/videos should be all downloaded to the specified locations. 85 | logging.info("Building datasets...") 86 | self.build_processors() 87 | 88 | build_info = self.config.build_info 89 | storage_path = build_info.storage 90 | 91 | datasets = dict() 92 | 93 | if not os.path.exists(storage_path): 94 | warnings.warn("storage path {} does not exist.".format(storage_path)) 95 | 96 | # create datasets 97 | dataset_cls = self.train_dataset_cls 98 | datasets['train'] = dataset_cls( 99 | vis_processor=self.vis_processors["train"], 100 | text_processor=self.text_processors["train"], 101 | ann_paths=[os.path.join(storage_path, 'filter_cap.json')], 102 | vis_root=os.path.join(storage_path, 'image'), 103 | ) 104 | 105 | return datasets 106 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__init__.py -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/base_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/base_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/base_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/base_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/caption_datasets.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/caption_datasets.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/caption_datasets.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/caption_datasets.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/dataloader_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/dataloader_utils.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/laion_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/laion_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/laion_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/laion_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/base_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | import json 9 | from typing import Iterable 10 | 11 | from torch.utils.data import Dataset, ConcatDataset 12 | from torch.utils.data.dataloader import default_collate 13 | 14 | 15 | class BaseDataset(Dataset): 16 | def __init__( 17 | self, vis_processor=None, text_processor=None, vis_root=None, ann_paths=[] 18 | ): 19 | """ 20 | vis_root (string): Root directory of images (e.g. coco/images/) 21 | ann_root (string): directory to store the annotation file 22 | """ 23 | self.vis_root = vis_root 24 | 25 | self.annotation = [] 26 | for ann_path in ann_paths: 27 | self.annotation.extend(json.load(open(ann_path, "r"))['annotations']) 28 | 29 | self.vis_processor = vis_processor 30 | self.text_processor = text_processor 31 | 32 | self._add_instance_ids() 33 | 34 | def __len__(self): 35 | return len(self.annotation) 36 | 37 | def collater(self, samples): 38 | return default_collate(samples) 39 | 40 | def set_processors(self, vis_processor, text_processor): 41 | self.vis_processor = vis_processor 42 | self.text_processor = text_processor 43 | 44 | def _add_instance_ids(self, key="instance_id"): 45 | for idx, ann in enumerate(self.annotation): 46 | ann[key] = str(idx) 47 | 48 | 49 | class ConcatDataset(ConcatDataset): 50 | def __init__(self, datasets: Iterable[Dataset]) -> None: 51 | super().__init__(datasets) 52 | 53 | def collater(self, samples): 54 | # TODO For now only supports datasets with same underlying collater implementations 55 | 56 | all_keys = set() 57 | for s in samples: 58 | all_keys.update(s) 59 | 60 | shared_keys = all_keys 61 | for s in samples: 62 | shared_keys = shared_keys & set(s.keys()) 63 | 64 | samples_shared_keys = [] 65 | for s in samples: 66 | samples_shared_keys.append({k: s[k] for k in s.keys() if k in shared_keys}) 67 | 68 | return self.datasets[0].collater(samples_shared_keys) 69 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/caption_datasets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | import os 9 | from collections import OrderedDict 10 | 11 | from minigpt4.datasets.datasets.base_dataset import BaseDataset 12 | from PIL import Image 13 | 14 | 15 | class __DisplMixin: 16 | def displ_item(self, index): 17 | sample, ann = self.__getitem__(index), self.annotation[index] 18 | 19 | return OrderedDict( 20 | { 21 | "file": ann["image"], 22 | "caption": ann["caption"], 23 | "image": sample["image"], 24 | } 25 | ) 26 | 27 | 28 | class CaptionDataset(BaseDataset, __DisplMixin): 29 | def __init__(self, vis_processor, text_processor, vis_root, ann_paths): 30 | """ 31 | vis_root (string): Root directory of images (e.g. coco/images/) 32 | ann_root (string): directory to store the annotation file 33 | """ 34 | super().__init__(vis_processor, text_processor, vis_root, ann_paths) 35 | 36 | self.img_ids = {} 37 | n = 0 38 | for ann in self.annotation: 39 | img_id = ann["image_id"] 40 | if img_id not in self.img_ids.keys(): 41 | self.img_ids[img_id] = n 42 | n += 1 43 | 44 | def __getitem__(self, index): 45 | 46 | # TODO this assumes image input, not general enough 47 | ann = self.annotation[index] 48 | 49 | img_file = '{:0>12}.jpg'.format(ann["image_id"]) 50 | image_path = os.path.join(self.vis_root, img_file) 51 | image = Image.open(image_path).convert("RGB") 52 | 53 | image = self.vis_processor(image) 54 | caption = self.text_processor(ann["caption"]) 55 | 56 | return { 57 | "image": image, 58 | "text_input": caption, 59 | "image_id": self.img_ids[ann["image_id"]], 60 | } 61 | 62 | 63 | class CaptionEvalDataset(BaseDataset, __DisplMixin): 64 | def __init__(self, vis_processor, text_processor, vis_root, ann_paths): 65 | """ 66 | vis_root (string): Root directory of images (e.g. coco/images/) 67 | ann_root (string): directory to store the annotation file 68 | split (string): val or test 69 | """ 70 | super().__init__(vis_processor, text_processor, vis_root, ann_paths) 71 | 72 | def __getitem__(self, index): 73 | 74 | ann = self.annotation[index] 75 | 76 | image_path = os.path.join(self.vis_root, ann["image"]) 77 | image = Image.open(image_path).convert("RGB") 78 | 79 | image = self.vis_processor(image) 80 | 81 | return { 82 | "image": image, 83 | "image_id": ann["image_id"], 84 | "instance_id": ann["instance_id"], 85 | } 86 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/cc_sbu_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image 3 | import webdataset as wds 4 | from minigpt4.datasets.datasets.base_dataset import BaseDataset 5 | from minigpt4.datasets.datasets.caption_datasets import CaptionDataset 6 | 7 | 8 | class CCSBUDataset(BaseDataset): 9 | def __init__(self, vis_processor, text_processor, location): 10 | super().__init__(vis_processor=vis_processor, text_processor=text_processor) 11 | 12 | self.inner_dataset = wds.DataPipeline( 13 | wds.ResampledShards(location), 14 | wds.tarfile_to_samples(handler=wds.warn_and_continue), 15 | wds.shuffle(1000, handler=wds.warn_and_continue), 16 | wds.decode("pilrgb", handler=wds.warn_and_continue), 17 | wds.to_tuple("jpg", "json", handler=wds.warn_and_continue), 18 | wds.map_tuple(self.vis_processor, handler=wds.warn_and_continue), 19 | wds.map(self.to_dict, handler=wds.warn_and_continue), 20 | ) 21 | 22 | def to_dict(self, sample): 23 | return { 24 | "image": sample[0], 25 | "text_input": self.text_processor(sample[1]["caption"]), 26 | } 27 | 28 | 29 | class CCSBUAlignDataset(CaptionDataset): 30 | 31 | def __getitem__(self, index): 32 | 33 | # TODO this assumes image input, not general enough 34 | ann = self.annotation[index] 35 | 36 | img_file = '{}.jpg'.format(ann["image_id"]) 37 | image_path = os.path.join(self.vis_root, img_file) 38 | image = Image.open(image_path).convert("RGB") 39 | 40 | image = self.vis_processor(image) 41 | caption = ann["caption"] 42 | 43 | return { 44 | "image": image, 45 | "text_input": caption, 46 | "image_id": self.img_ids[ann["image_id"]], 47 | } -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/laion_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | import webdataset as wds 9 | from minigpt4.datasets.datasets.base_dataset import BaseDataset 10 | 11 | 12 | class LaionDataset(BaseDataset): 13 | def __init__(self, vis_processor, text_processor, location): 14 | super().__init__(vis_processor=vis_processor, text_processor=text_processor) 15 | 16 | self.inner_dataset = wds.DataPipeline( 17 | wds.ResampledShards(location), 18 | wds.tarfile_to_samples(handler=wds.warn_and_continue), 19 | wds.shuffle(1000, handler=wds.warn_and_continue), 20 | wds.decode("pilrgb", handler=wds.warn_and_continue), 21 | wds.to_tuple("jpg", "json", handler=wds.warn_and_continue), 22 | wds.map_tuple(self.vis_processor, handler=wds.warn_and_continue), 23 | wds.map(self.to_dict, handler=wds.warn_and_continue), 24 | ) 25 | 26 | def to_dict(self, sample): 27 | return { 28 | "image": sample[0], 29 | "text_input": self.text_processor(sample[1]["caption"]), 30 | } 31 | 32 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/Qformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/Qformer.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/Qformer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/Qformer.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/base_model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/base_model.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/base_model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/base_model.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/blip2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/blip2.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/blip2.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/blip2.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/eva_vit.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/eva_vit.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/eva_vit.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/eva_vit.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/mini_gpt4.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/mini_gpt4.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/mini_gpt4.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/mini_gpt4.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/modeling_llama.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/modeling_llama.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/modeling_llama.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/modeling_llama.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/models/blip2_outputs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | from dataclasses import dataclass 9 | from typing import Optional 10 | 11 | import torch 12 | from transformers.modeling_outputs import ( 13 | ModelOutput, 14 | BaseModelOutputWithPoolingAndCrossAttentions, 15 | CausalLMOutputWithCrossAttentions, 16 | ) 17 | 18 | 19 | @dataclass 20 | class BlipSimilarity(ModelOutput): 21 | sim_i2t: torch.FloatTensor = None 22 | sim_t2i: torch.FloatTensor = None 23 | 24 | sim_i2t_m: Optional[torch.FloatTensor] = None 25 | sim_t2i_m: Optional[torch.FloatTensor] = None 26 | 27 | sim_i2t_targets: Optional[torch.FloatTensor] = None 28 | sim_t2i_targets: Optional[torch.FloatTensor] = None 29 | 30 | 31 | @dataclass 32 | class BlipIntermediateOutput(ModelOutput): 33 | """ 34 | Data class for intermediate outputs of BLIP models. 35 | 36 | image_embeds (torch.FloatTensor): Image embeddings, shape (batch_size, num_patches, embed_dim). 37 | text_embeds (torch.FloatTensor): Text embeddings, shape (batch_size, seq_len, embed_dim). 38 | 39 | image_embeds_m (torch.FloatTensor): Image embeddings from momentum visual encoder, shape (batch_size, num_patches, embed_dim). 40 | text_embeds_m (torch.FloatTensor): Text embeddings from momentum text encoder, shape (batch_size, seq_len, embed_dim). 41 | 42 | encoder_output (BaseModelOutputWithPoolingAndCrossAttentions): output from the image-grounded text encoder. 43 | encoder_output_neg (BaseModelOutputWithPoolingAndCrossAttentions): output from the image-grounded text encoder for negative pairs. 44 | 45 | decoder_output (CausalLMOutputWithCrossAttentions): output from the image-grounded text decoder. 46 | decoder_labels (torch.LongTensor): labels for the captioning loss. 47 | 48 | itm_logits (torch.FloatTensor): logits for the image-text matching loss, shape (batch_size * 3, 2). 49 | itm_labels (torch.LongTensor): labels for the image-text matching loss, shape (batch_size * 3,) 50 | 51 | """ 52 | 53 | # uni-modal features 54 | image_embeds: torch.FloatTensor = None 55 | text_embeds: Optional[torch.FloatTensor] = None 56 | 57 | image_embeds_m: Optional[torch.FloatTensor] = None 58 | text_embeds_m: Optional[torch.FloatTensor] = None 59 | 60 | # intermediate outputs of multimodal encoder 61 | encoder_output: Optional[BaseModelOutputWithPoolingAndCrossAttentions] = None 62 | encoder_output_neg: Optional[BaseModelOutputWithPoolingAndCrossAttentions] = None 63 | 64 | itm_logits: Optional[torch.FloatTensor] = None 65 | itm_labels: Optional[torch.LongTensor] = None 66 | 67 | # intermediate outputs of multimodal decoder 68 | decoder_output: Optional[CausalLMOutputWithCrossAttentions] = None 69 | decoder_labels: Optional[torch.LongTensor] = None 70 | 71 | 72 | @dataclass 73 | class BlipOutput(ModelOutput): 74 | # some finetuned models (e.g. BlipVQA) do not compute similarity, thus optional. 75 | sims: Optional[BlipSimilarity] = None 76 | 77 | intermediate_output: BlipIntermediateOutput = None 78 | 79 | loss: Optional[torch.FloatTensor] = None 80 | 81 | loss_itc: Optional[torch.FloatTensor] = None 82 | 83 | loss_itm: Optional[torch.FloatTensor] = None 84 | 85 | loss_lm: Optional[torch.FloatTensor] = None 86 | 87 | 88 | @dataclass 89 | class BlipOutputFeatures(ModelOutput): 90 | """ 91 | Data class of features from BlipFeatureExtractor. 92 | 93 | Args: 94 | image_embeds: (torch.FloatTensor) of shape (batch_size, num_patches+1, embed_dim), optional 95 | image_features: (torch.FloatTensor) of shape (batch_size, num_patches+1, feature_dim), optional 96 | text_embeds: (torch.FloatTensor) of shape (batch_size, sequence_length+1, embed_dim), optional 97 | text_features: (torch.FloatTensor) of shape (batch_size, sequence_length+1, feature_dim), optional 98 | 99 | The first embedding or feature is for the [CLS] token. 100 | 101 | Features are obtained by projecting the corresponding embedding into a normalized low-dimensional space. 102 | """ 103 | 104 | image_embeds: Optional[torch.FloatTensor] = None 105 | image_embeds_proj: Optional[torch.FloatTensor] = None 106 | 107 | text_embeds: Optional[torch.FloatTensor] = None 108 | text_embeds_proj: Optional[torch.FloatTensor] = None 109 | 110 | multimodal_embeds: Optional[torch.FloatTensor] = None 111 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/processors/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | from minigpt4.processors.base_processor import BaseProcessor 9 | from minigpt4.processors.blip_processors import ( 10 | Blip2ImageTrainProcessor, 11 | Blip2ImageEvalProcessor, 12 | BlipCaptionProcessor, 13 | ) 14 | 15 | from minigpt4.common.registry import registry 16 | 17 | __all__ = [ 18 | "BaseProcessor", 19 | "Blip2ImageTrainProcessor", 20 | "Blip2ImageEvalProcessor", 21 | "BlipCaptionProcessor", 22 | ] 23 | 24 | 25 | def load_processor(name, cfg=None): 26 | """ 27 | Example 28 | 29 | >>> processor = load_processor("alpro_video_train", cfg=None) 30 | """ 31 | processor = registry.get_processor_class(name).from_config(cfg) 32 | 33 | return processor 34 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/base_processor.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/base_processor.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/base_processor.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/base_processor.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/blip_processors.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/blip_processors.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/blip_processors.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/blip_processors.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/randaugment.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/randaugment.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/randaugment.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/randaugment.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/processors/base_processor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | from omegaconf import OmegaConf 9 | 10 | 11 | class BaseProcessor: 12 | def __init__(self): 13 | self.transform = lambda x: x 14 | return 15 | 16 | def __call__(self, item): 17 | return self.transform(item) 18 | 19 | @classmethod 20 | def from_config(cls, cfg=None): 21 | return cls() 22 | 23 | def build(self, **kwargs): 24 | cfg = OmegaConf.create(kwargs) 25 | 26 | return self.from_config(cfg) 27 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/runners/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | from minigpt4.runners.runner_base import RunnerBase 9 | 10 | __all__ = ["RunnerBase"] 11 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/runners/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/runners/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/runners/__pycache__/runner_base.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/runners/__pycache__/runner_base.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | from minigpt4.common.registry import registry 9 | from minigpt4.tasks.base_task import BaseTask 10 | from minigpt4.tasks.image_text_pretrain import ImageTextPretrainTask 11 | 12 | 13 | def setup_task(cfg): 14 | assert "task" in cfg.run_cfg, "Task name must be provided." 15 | 16 | task_name = cfg.run_cfg.task 17 | task = registry.get_task_class(task_name).setup_task(cfg=cfg) 18 | assert task is not None, "Task {} not properly registered.".format(task_name) 19 | 20 | return task 21 | 22 | 23 | __all__ = [ 24 | "BaseTask", 25 | "ImageTextPretrainTask", 26 | ] 27 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/base_task.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/base_task.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/image_text_pretrain.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/image_text_pretrain.cpython-39.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase I/minigpt4/tasks/image_text_pretrain.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022, salesforce.com, inc. 3 | All rights reserved. 4 | SPDX-License-Identifier: BSD-3-Clause 5 | For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | """ 7 | 8 | from minigpt4.common.registry import registry 9 | from minigpt4.tasks.base_task import BaseTask 10 | 11 | 12 | @registry.register_task("image_text_pretrain") 13 | class ImageTextPretrainTask(BaseTask): 14 | def __init__(self): 15 | super().__init__() 16 | 17 | def evaluation(self, model, data_loader, cuda_enabled=True): 18 | pass 19 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/_C.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/_C.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/_C.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/_C.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__init__.py -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py: -------------------------------------------------------------------------------- 1 | batch_size = 1 2 | modelname = "groundingdino" 3 | backbone = "swin_B_384_22k" 4 | position_embedding = "sine" 5 | pe_temperatureH = 20 6 | pe_temperatureW = 20 7 | return_interm_indices = [1, 2, 3] 8 | backbone_freeze_keywords = None 9 | enc_layers = 6 10 | dec_layers = 6 11 | pre_norm = False 12 | dim_feedforward = 2048 13 | hidden_dim = 256 14 | dropout = 0.0 15 | nheads = 8 16 | num_queries = 900 17 | query_dim = 4 18 | num_patterns = 0 19 | num_feature_levels = 4 20 | enc_n_points = 4 21 | dec_n_points = 4 22 | two_stage_type = "standard" 23 | two_stage_bbox_embed_share = False 24 | two_stage_class_embed_share = False 25 | transformer_activation = "relu" 26 | dec_pred_bbox_embed_share = True 27 | dn_box_noise_scale = 1.0 28 | dn_label_noise_ratio = 0.5 29 | dn_label_coef = 1.0 30 | dn_bbox_coef = 1.0 31 | embed_init_tgt = True 32 | dn_labelbook_size = 2000 33 | max_text_len = 256 34 | text_encoder_type = "bert-base-uncased" 35 | use_text_enhancer = True 36 | use_fusion_layer = True 37 | use_checkpoint = True 38 | use_transformer_ckpt = True 39 | use_text_cross_attention = True 40 | text_dropout = 0.0 41 | fusion_dropout = 0.0 42 | fusion_droppath = 0.1 43 | sub_sentence_present = True 44 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py: -------------------------------------------------------------------------------- 1 | batch_size = 1 2 | modelname = "groundingdino" 3 | backbone = "swin_T_224_1k" 4 | position_embedding = "sine" 5 | pe_temperatureH = 20 6 | pe_temperatureW = 20 7 | return_interm_indices = [1, 2, 3] 8 | backbone_freeze_keywords = None 9 | enc_layers = 6 10 | dec_layers = 6 11 | pre_norm = False 12 | dim_feedforward = 2048 13 | hidden_dim = 256 14 | dropout = 0.0 15 | nheads = 8 16 | num_queries = 900 17 | query_dim = 4 18 | num_patterns = 0 19 | num_feature_levels = 4 20 | enc_n_points = 4 21 | dec_n_points = 4 22 | two_stage_type = "standard" 23 | two_stage_bbox_embed_share = False 24 | two_stage_class_embed_share = False 25 | transformer_activation = "relu" 26 | dec_pred_bbox_embed_share = True 27 | dn_box_noise_scale = 1.0 28 | dn_label_noise_ratio = 0.5 29 | dn_label_coef = 1.0 30 | dn_bbox_coef = 1.0 31 | embed_init_tgt = True 32 | dn_labelbook_size = 2000 33 | max_text_len = 256 34 | text_encoder_type = "bert-base-uncased" 35 | use_text_enhancer = True 36 | use_fusion_layer = True 37 | use_checkpoint = True 38 | use_transformer_ckpt = True 39 | use_text_cross_attention = True 40 | text_dropout = 0.0 41 | fusion_dropout = 0.0 42 | fusion_droppath = 0.1 43 | sub_sentence_present = True 44 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__init__.py -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/transforms.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/transforms.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/transforms.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/transforms.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Grounding DINO 3 | # url: https://github.com/IDEA-Research/GroundingDINO 4 | # Copyright (c) 2023 IDEA. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------ 7 | # Conditional DETR 8 | # Copyright (c) 2021 Microsoft. All Rights Reserved. 9 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 10 | # ------------------------------------------------------------------------ 11 | # Copied from DETR (https://github.com/facebookresearch/detr) 12 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 13 | # ------------------------------------------------------------------------ 14 | 15 | from .groundingdino import build_groundingdino 16 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/bertwarper.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/bertwarper.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/bertwarper.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/bertwarper.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/fuse_modules.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/fuse_modules.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/fuse_modules.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/fuse_modules.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/groundingdino.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/groundingdino.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/groundingdino.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/groundingdino.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/ms_deform_attn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/ms_deform_attn.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/ms_deform_attn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/ms_deform_attn.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer_vanilla.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer_vanilla.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer_vanilla.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer_vanilla.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbone import build_backbone 2 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/backbone.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/backbone.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/backbone.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/backbone.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/position_encoding.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/position_encoding.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/position_encoding.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/position_encoding.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/swin_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/swin_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/swin_transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/swin_transformer.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | 13 | #include "ms_deform_attn_cpu.h" 14 | 15 | #ifdef WITH_CUDA 16 | #include "ms_deform_attn_cuda.h" 17 | #endif 18 | 19 | namespace groundingdino { 20 | 21 | at::Tensor 22 | ms_deform_attn_forward( 23 | const at::Tensor &value, 24 | const at::Tensor &spatial_shapes, 25 | const at::Tensor &level_start_index, 26 | const at::Tensor &sampling_loc, 27 | const at::Tensor &attn_weight, 28 | const int im2col_step) 29 | { 30 | if (value.type().is_cuda()) 31 | { 32 | #ifdef WITH_CUDA 33 | return ms_deform_attn_cuda_forward( 34 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); 35 | #else 36 | AT_ERROR("Not compiled with GPU support"); 37 | #endif 38 | } 39 | AT_ERROR("Not implemented on the CPU"); 40 | } 41 | 42 | std::vector 43 | ms_deform_attn_backward( 44 | const at::Tensor &value, 45 | const at::Tensor &spatial_shapes, 46 | const at::Tensor &level_start_index, 47 | const at::Tensor &sampling_loc, 48 | const at::Tensor &attn_weight, 49 | const at::Tensor &grad_output, 50 | const int im2col_step) 51 | { 52 | if (value.type().is_cuda()) 53 | { 54 | #ifdef WITH_CUDA 55 | return ms_deform_attn_cuda_backward( 56 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step); 57 | #else 58 | AT_ERROR("Not compiled with GPU support"); 59 | #endif 60 | } 61 | AT_ERROR("Not implemented on the CPU"); 62 | } 63 | 64 | } // namespace groundingdino -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | namespace groundingdino { 17 | 18 | at::Tensor 19 | ms_deform_attn_cpu_forward( 20 | const at::Tensor &value, 21 | const at::Tensor &spatial_shapes, 22 | const at::Tensor &level_start_index, 23 | const at::Tensor &sampling_loc, 24 | const at::Tensor &attn_weight, 25 | const int im2col_step) 26 | { 27 | AT_ERROR("Not implement on cpu"); 28 | } 29 | 30 | std::vector 31 | ms_deform_attn_cpu_backward( 32 | const at::Tensor &value, 33 | const at::Tensor &spatial_shapes, 34 | const at::Tensor &level_start_index, 35 | const at::Tensor &sampling_loc, 36 | const at::Tensor &attn_weight, 37 | const at::Tensor &grad_output, 38 | const int im2col_step) 39 | { 40 | AT_ERROR("Not implement on cpu"); 41 | } 42 | 43 | } // namespace groundingdino 44 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | namespace groundingdino { 15 | 16 | at::Tensor 17 | ms_deform_attn_cpu_forward( 18 | const at::Tensor &value, 19 | const at::Tensor &spatial_shapes, 20 | const at::Tensor &level_start_index, 21 | const at::Tensor &sampling_loc, 22 | const at::Tensor &attn_weight, 23 | const int im2col_step); 24 | 25 | std::vector 26 | ms_deform_attn_cpu_backward( 27 | const at::Tensor &value, 28 | const at::Tensor &spatial_shapes, 29 | const at::Tensor &level_start_index, 30 | const at::Tensor &sampling_loc, 31 | const at::Tensor &attn_weight, 32 | const at::Tensor &grad_output, 33 | const int im2col_step); 34 | 35 | } // namespace groundingdino 36 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | namespace groundingdino { 15 | 16 | at::Tensor ms_deform_attn_cuda_forward( 17 | const at::Tensor &value, 18 | const at::Tensor &spatial_shapes, 19 | const at::Tensor &level_start_index, 20 | const at::Tensor &sampling_loc, 21 | const at::Tensor &attn_weight, 22 | const int im2col_step); 23 | 24 | std::vector ms_deform_attn_cuda_backward( 25 | const at::Tensor &value, 26 | const at::Tensor &spatial_shapes, 27 | const at::Tensor &level_start_index, 28 | const at::Tensor &sampling_loc, 29 | const at::Tensor &attn_weight, 30 | const at::Tensor &grad_output, 31 | const int im2col_step); 32 | 33 | } // namespace groundingdino -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/cuda_version.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace groundingdino { 4 | int get_cudart_version() { 5 | return CUDART_VERSION; 6 | } 7 | } // namespace groundingdino 8 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | #include "MsDeformAttn/ms_deform_attn.h" 4 | 5 | namespace groundingdino { 6 | 7 | #ifdef WITH_CUDA 8 | extern int get_cudart_version(); 9 | #endif 10 | 11 | std::string get_cuda_version() { 12 | #ifdef WITH_CUDA 13 | std::ostringstream oss; 14 | 15 | // copied from 16 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 17 | auto printCudaStyleVersion = [&](int v) { 18 | oss << (v / 1000) << "." << (v / 10 % 100); 19 | if (v % 10 != 0) { 20 | oss << "." << (v % 10); 21 | } 22 | }; 23 | printCudaStyleVersion(get_cudart_version()); 24 | return oss.str(); 25 | #else 26 | return std::string("not available"); 27 | #endif 28 | } 29 | 30 | // similar to 31 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 32 | std::string get_compiler_version() { 33 | std::ostringstream ss; 34 | #if defined(__GNUC__) 35 | #ifndef __clang__ 36 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 37 | #endif 38 | #endif 39 | 40 | #if defined(__clang_major__) 41 | { 42 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 43 | << __clang_patchlevel__; 44 | } 45 | #endif 46 | 47 | #if defined(_MSC_VER) 48 | { ss << "MSVC " << _MSC_FULL_VER; } 49 | #endif 50 | return ss.str(); 51 | } 52 | 53 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 54 | m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); 55 | m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); 56 | } 57 | 58 | } // namespace groundingdino -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/transformer_vanilla.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Grounding DINO 3 | # url: https://github.com/IDEA-Research/GroundingDINO 4 | # Copyright (c) 2023 IDEA. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------ 7 | # Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved 8 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 9 | """ 10 | DETR Transformer class. 11 | 12 | Copy-paste from torch.nn.Transformer with modifications: 13 | * positional encodings are passed in MHattention 14 | * extra LN at the end of encoder is removed 15 | * decoder returns a stack of activations from all decoding layers 16 | """ 17 | from typing import Optional 18 | 19 | import torch 20 | import torch.nn.functional as F 21 | from torch import Tensor, nn 22 | 23 | from .utils import ( 24 | MLP, 25 | _get_activation_fn, 26 | _get_clones, 27 | gen_encoder_output_proposals, 28 | gen_sineembed_for_position, 29 | sigmoid_focal_loss, 30 | ) 31 | 32 | 33 | class TextTransformer(nn.Module): 34 | def __init__(self, num_layers, d_model=256, nheads=8, dim_feedforward=2048, dropout=0.1): 35 | super().__init__() 36 | self.num_layers = num_layers 37 | self.d_model = d_model 38 | self.nheads = nheads 39 | self.dim_feedforward = dim_feedforward 40 | self.norm = None 41 | 42 | single_encoder_layer = TransformerEncoderLayer( 43 | d_model=d_model, nhead=nheads, dim_feedforward=dim_feedforward, dropout=dropout 44 | ) 45 | self.layers = _get_clones(single_encoder_layer, num_layers) 46 | 47 | def forward(self, memory_text: torch.Tensor, text_attention_mask: torch.Tensor): 48 | """ 49 | 50 | Args: 51 | text_attention_mask: bs, num_token 52 | memory_text: bs, num_token, d_model 53 | 54 | Raises: 55 | RuntimeError: _description_ 56 | 57 | Returns: 58 | output: bs, num_token, d_model 59 | """ 60 | 61 | output = memory_text.transpose(0, 1) 62 | 63 | for layer in self.layers: 64 | output = layer(output, src_key_padding_mask=text_attention_mask) 65 | 66 | if self.norm is not None: 67 | output = self.norm(output) 68 | 69 | return output.transpose(0, 1) 70 | 71 | 72 | class TransformerEncoderLayer(nn.Module): 73 | def __init__( 74 | self, 75 | d_model, 76 | nhead, 77 | dim_feedforward=2048, 78 | dropout=0.1, 79 | activation="relu", 80 | normalize_before=False, 81 | ): 82 | super().__init__() 83 | self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) 84 | # Implementation of Feedforward model 85 | self.linear1 = nn.Linear(d_model, dim_feedforward) 86 | self.dropout = nn.Dropout(dropout) 87 | self.linear2 = nn.Linear(dim_feedforward, d_model) 88 | 89 | self.norm1 = nn.LayerNorm(d_model) 90 | self.norm2 = nn.LayerNorm(d_model) 91 | self.dropout1 = nn.Dropout(dropout) 92 | self.dropout2 = nn.Dropout(dropout) 93 | 94 | self.activation = _get_activation_fn(activation) 95 | self.normalize_before = normalize_before 96 | self.nhead = nhead 97 | 98 | def with_pos_embed(self, tensor, pos: Optional[Tensor]): 99 | return tensor if pos is None else tensor + pos 100 | 101 | def forward( 102 | self, 103 | src, 104 | src_mask: Optional[Tensor] = None, 105 | src_key_padding_mask: Optional[Tensor] = None, 106 | pos: Optional[Tensor] = None, 107 | ): 108 | # repeat attn mask 109 | if src_mask.dim() == 3 and src_mask.shape[0] == src.shape[1]: 110 | # bs, num_q, num_k 111 | src_mask = src_mask.repeat(self.nhead, 1, 1) 112 | 113 | q = k = self.with_pos_embed(src, pos) 114 | 115 | src2 = self.self_attn(q, k, value=src, attn_mask=src_mask)[0] 116 | 117 | # src2 = self.self_attn(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0] 118 | src = src + self.dropout1(src2) 119 | src = self.norm1(src) 120 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 121 | src = src + self.dropout2(src2) 122 | src = self.norm2(src) 123 | return src 124 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Grounding DINO 3 | # url: https://github.com/IDEA-Research/GroundingDINO 4 | # Copyright (c) 2023 IDEA. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------ 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 8 | from .GroundingDINO import build_groundingdino 9 | 10 | 11 | def build_model(args): 12 | # we use register to maintain models from catdet6 on. 13 | from .registry import MODULE_BUILD_FUNCS 14 | 15 | assert args.modelname in MODULE_BUILD_FUNCS._module_dict 16 | build_func = MODULE_BUILD_FUNCS.get(args.modelname) 17 | model = build_func(args) 18 | return model 19 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/registry.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/registry.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/registry.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/registry.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/registry.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Grounding DINO 3 | # url: https://github.com/IDEA-Research/GroundingDINO 4 | # Copyright (c) 2023 IDEA. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------ 7 | # -*- coding: utf-8 -*- 8 | # @Author: Yihao Chen 9 | # @Date: 2021-08-16 16:03:17 10 | # @Last Modified by: Shilong Liu 11 | # @Last Modified time: 2022-01-23 15:26 12 | # modified from mmcv 13 | 14 | import inspect 15 | from functools import partial 16 | 17 | 18 | class Registry(object): 19 | def __init__(self, name): 20 | self._name = name 21 | self._module_dict = dict() 22 | 23 | def __repr__(self): 24 | format_str = self.__class__.__name__ + "(name={}, items={})".format( 25 | self._name, list(self._module_dict.keys()) 26 | ) 27 | return format_str 28 | 29 | def __len__(self): 30 | return len(self._module_dict) 31 | 32 | @property 33 | def name(self): 34 | return self._name 35 | 36 | @property 37 | def module_dict(self): 38 | return self._module_dict 39 | 40 | def get(self, key): 41 | return self._module_dict.get(key, None) 42 | 43 | def registe_with_name(self, module_name=None, force=False): 44 | return partial(self.register, module_name=module_name, force=force) 45 | 46 | def register(self, module_build_function, module_name=None, force=False): 47 | """Register a module build function. 48 | Args: 49 | module (:obj:`nn.Module`): Module to be registered. 50 | """ 51 | if not inspect.isfunction(module_build_function): 52 | raise TypeError( 53 | "module_build_function must be a function, but got {}".format( 54 | type(module_build_function) 55 | ) 56 | ) 57 | if module_name is None: 58 | module_name = module_build_function.__name__ 59 | if not force and module_name in self._module_dict: 60 | raise KeyError("{} is already registered in {}".format(module_name, self.name)) 61 | self._module_dict[module_name] = module_build_function 62 | 63 | return module_build_function 64 | 65 | 66 | MODULE_BUILD_FUNCS = Registry("model build functions") 67 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/box_ops.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/box_ops.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/box_ops.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/box_ops.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/get_tokenlizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/get_tokenlizer.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/get_tokenlizer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/get_tokenlizer.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/inference.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/inference.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/inference.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/inference.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/misc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/misc.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/slconfig.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/slconfig.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/slconfig.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/slconfig.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/visualizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/visualizer.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/visualizer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/visualizer.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/vl_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/vl_utils.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/vl_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/vl_utils.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/box_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Utilities for bounding box manipulation and GIoU. 4 | """ 5 | import torch 6 | from torchvision.ops.boxes import box_area 7 | 8 | 9 | def box_cxcywh_to_xyxy(x): 10 | x_c, y_c, w, h = x.unbind(-1) 11 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)] 12 | return torch.stack(b, dim=-1) 13 | 14 | 15 | def box_xyxy_to_cxcywh(x): 16 | x0, y0, x1, y1 = x.unbind(-1) 17 | b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)] 18 | return torch.stack(b, dim=-1) 19 | 20 | 21 | # modified from torchvision to also return the union 22 | def box_iou(boxes1, boxes2): 23 | area1 = box_area(boxes1) 24 | area2 = box_area(boxes2) 25 | 26 | # import ipdb; ipdb.set_trace() 27 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] 28 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] 29 | 30 | wh = (rb - lt).clamp(min=0) # [N,M,2] 31 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 32 | 33 | union = area1[:, None] + area2 - inter 34 | 35 | iou = inter / (union + 1e-6) 36 | return iou, union 37 | 38 | 39 | def generalized_box_iou(boxes1, boxes2): 40 | """ 41 | Generalized IoU from https://giou.stanford.edu/ 42 | 43 | The boxes should be in [x0, y0, x1, y1] format 44 | 45 | Returns a [N, M] pairwise matrix, where N = len(boxes1) 46 | and M = len(boxes2) 47 | """ 48 | # degenerate boxes gives inf / nan results 49 | # so do an early check 50 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all() 51 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all() 52 | # except: 53 | # import ipdb; ipdb.set_trace() 54 | iou, union = box_iou(boxes1, boxes2) 55 | 56 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2]) 57 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) 58 | 59 | wh = (rb - lt).clamp(min=0) # [N,M,2] 60 | area = wh[:, :, 0] * wh[:, :, 1] 61 | 62 | return iou - (area - union) / (area + 1e-6) 63 | 64 | 65 | # modified from torchvision to also return the union 66 | def box_iou_pairwise(boxes1, boxes2): 67 | area1 = box_area(boxes1) 68 | area2 = box_area(boxes2) 69 | 70 | lt = torch.max(boxes1[:, :2], boxes2[:, :2]) # [N,2] 71 | rb = torch.min(boxes1[:, 2:], boxes2[:, 2:]) # [N,2] 72 | 73 | wh = (rb - lt).clamp(min=0) # [N,2] 74 | inter = wh[:, 0] * wh[:, 1] # [N] 75 | 76 | union = area1 + area2 - inter 77 | 78 | iou = inter / union 79 | return iou, union 80 | 81 | 82 | def generalized_box_iou_pairwise(boxes1, boxes2): 83 | """ 84 | Generalized IoU from https://giou.stanford.edu/ 85 | 86 | Input: 87 | - boxes1, boxes2: N,4 88 | Output: 89 | - giou: N, 4 90 | """ 91 | # degenerate boxes gives inf / nan results 92 | # so do an early check 93 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all() 94 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all() 95 | assert boxes1.shape == boxes2.shape 96 | iou, union = box_iou_pairwise(boxes1, boxes2) # N, 4 97 | 98 | lt = torch.min(boxes1[:, :2], boxes2[:, :2]) 99 | rb = torch.max(boxes1[:, 2:], boxes2[:, 2:]) 100 | 101 | wh = (rb - lt).clamp(min=0) # [N,2] 102 | area = wh[:, 0] * wh[:, 1] 103 | 104 | return iou - (area - union) / area 105 | 106 | 107 | def masks_to_boxes(masks): 108 | """Compute the bounding boxes around the provided masks 109 | 110 | The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions. 111 | 112 | Returns a [N, 4] tensors, with the boxes in xyxy format 113 | """ 114 | if masks.numel() == 0: 115 | return torch.zeros((0, 4), device=masks.device) 116 | 117 | h, w = masks.shape[-2:] 118 | 119 | y = torch.arange(0, h, dtype=torch.float) 120 | x = torch.arange(0, w, dtype=torch.float) 121 | y, x = torch.meshgrid(y, x) 122 | 123 | x_mask = masks * x.unsqueeze(0) 124 | x_max = x_mask.flatten(1).max(-1)[0] 125 | x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 126 | 127 | y_mask = masks * y.unsqueeze(0) 128 | y_max = y_mask.flatten(1).max(-1)[0] 129 | y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 130 | 131 | return torch.stack([x_min, y_min, x_max, y_max], 1) 132 | 133 | 134 | if __name__ == "__main__": 135 | x = torch.rand(5, 4) 136 | y = torch.rand(3, 4) 137 | iou, union = box_iou(x, y) 138 | import ipdb 139 | 140 | ipdb.set_trace() 141 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/get_tokenlizer.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoTokenizer, BertModel, BertTokenizer, RobertaModel, RobertaTokenizerFast 2 | 3 | 4 | def get_tokenlizer(text_encoder_type): 5 | if not isinstance(text_encoder_type, str): 6 | # print("text_encoder_type is not a str") 7 | if hasattr(text_encoder_type, "text_encoder_type"): 8 | text_encoder_type = text_encoder_type.text_encoder_type 9 | elif text_encoder_type.get("text_encoder_type", False): 10 | text_encoder_type = text_encoder_type.get("text_encoder_type") 11 | else: 12 | raise ValueError( 13 | "Unknown type of text_encoder_type: {}".format(type(text_encoder_type)) 14 | ) 15 | print("final text_encoder_type: {}".format(text_encoder_type)) 16 | 17 | tokenizer = AutoTokenizer.from_pretrained(text_encoder_type) 18 | return tokenizer 19 | 20 | 21 | def get_pretrained_language_model(text_encoder_type): 22 | if text_encoder_type == "bert-base-uncased": 23 | return BertModel.from_pretrained(text_encoder_type) 24 | if text_encoder_type == "roberta-base": 25 | return RobertaModel.from_pretrained(text_encoder_type) 26 | raise ValueError("Unknown text_encoder_type {}".format(text_encoder_type)) 27 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import functools 3 | import logging 4 | import os 5 | import sys 6 | 7 | from termcolor import colored 8 | 9 | 10 | class _ColorfulFormatter(logging.Formatter): 11 | def __init__(self, *args, **kwargs): 12 | self._root_name = kwargs.pop("root_name") + "." 13 | self._abbrev_name = kwargs.pop("abbrev_name", "") 14 | if len(self._abbrev_name): 15 | self._abbrev_name = self._abbrev_name + "." 16 | super(_ColorfulFormatter, self).__init__(*args, **kwargs) 17 | 18 | def formatMessage(self, record): 19 | record.name = record.name.replace(self._root_name, self._abbrev_name) 20 | log = super(_ColorfulFormatter, self).formatMessage(record) 21 | if record.levelno == logging.WARNING: 22 | prefix = colored("WARNING", "red", attrs=["blink"]) 23 | elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL: 24 | prefix = colored("ERROR", "red", attrs=["blink", "underline"]) 25 | else: 26 | return log 27 | return prefix + " " + log 28 | 29 | 30 | # so that calling setup_logger multiple times won't add many handlers 31 | @functools.lru_cache() 32 | def setup_logger(output=None, distributed_rank=0, *, color=True, name="imagenet", abbrev_name=None): 33 | """ 34 | Initialize the detectron2 logger and set its verbosity level to "INFO". 35 | 36 | Args: 37 | output (str): a file name or a directory to save log. If None, will not save log file. 38 | If ends with ".txt" or ".log", assumed to be a file name. 39 | Otherwise, logs will be saved to `output/log.txt`. 40 | name (str): the root module name of this logger 41 | 42 | Returns: 43 | logging.Logger: a logger 44 | """ 45 | logger = logging.getLogger(name) 46 | logger.setLevel(logging.DEBUG) 47 | logger.propagate = False 48 | 49 | if abbrev_name is None: 50 | abbrev_name = name 51 | 52 | plain_formatter = logging.Formatter( 53 | "[%(asctime)s.%(msecs)03d]: %(message)s", datefmt="%m/%d %H:%M:%S" 54 | ) 55 | # stdout logging: master only 56 | if distributed_rank == 0: 57 | ch = logging.StreamHandler(stream=sys.stdout) 58 | ch.setLevel(logging.DEBUG) 59 | if color: 60 | formatter = _ColorfulFormatter( 61 | colored("[%(asctime)s.%(msecs)03d]: ", "green") + "%(message)s", 62 | datefmt="%m/%d %H:%M:%S", 63 | root_name=name, 64 | abbrev_name=str(abbrev_name), 65 | ) 66 | else: 67 | formatter = plain_formatter 68 | ch.setFormatter(formatter) 69 | logger.addHandler(ch) 70 | 71 | # file logging: all workers 72 | if output is not None: 73 | if output.endswith(".txt") or output.endswith(".log"): 74 | filename = output 75 | else: 76 | filename = os.path.join(output, "log.txt") 77 | if distributed_rank > 0: 78 | filename = filename + f".rank{distributed_rank}" 79 | os.makedirs(os.path.dirname(filename), exist_ok=True) 80 | 81 | fh = logging.StreamHandler(_cached_log_stream(filename)) 82 | fh.setLevel(logging.DEBUG) 83 | fh.setFormatter(plain_formatter) 84 | logger.addHandler(fh) 85 | 86 | return logger 87 | 88 | 89 | # cache the opened file object, so that different calls to `setup_logger` 90 | # with the same file name can safely write to the same file. 91 | @functools.lru_cache(maxsize=None) 92 | def _cached_log_stream(filename): 93 | return open(filename, "a") 94 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/time_counter.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | 4 | 5 | class TimeCounter: 6 | def __init__(self) -> None: 7 | pass 8 | 9 | def clear(self): 10 | self.timedict = {} 11 | self.basetime = time.perf_counter() 12 | 13 | def timeit(self, name): 14 | nowtime = time.perf_counter() - self.basetime 15 | self.timedict[name] = nowtime 16 | self.basetime = time.perf_counter() 17 | 18 | 19 | class TimeHolder: 20 | def __init__(self) -> None: 21 | self.timedict = {} 22 | 23 | def update(self, _timedict: dict): 24 | for k, v in _timedict.items(): 25 | if k not in self.timedict: 26 | self.timedict[k] = AverageMeter(name=k, val_only=True) 27 | self.timedict[k].update(val=v) 28 | 29 | def final_res(self): 30 | return {k: v.avg for k, v in self.timedict.items()} 31 | 32 | def __str__(self): 33 | return json.dumps(self.final_res(), indent=2) 34 | 35 | 36 | class AverageMeter(object): 37 | """Computes and stores the average and current value""" 38 | 39 | def __init__(self, name, fmt=":f", val_only=False): 40 | self.name = name 41 | self.fmt = fmt 42 | self.val_only = val_only 43 | self.reset() 44 | 45 | def reset(self): 46 | self.val = 0 47 | self.avg = 0 48 | self.sum = 0 49 | self.count = 0 50 | 51 | def update(self, val, n=1): 52 | self.val = val 53 | self.sum += val * n 54 | self.count += n 55 | self.avg = self.sum / self.count 56 | 57 | def __str__(self): 58 | if self.val_only: 59 | fmtstr = "{name} {val" + self.fmt + "}" 60 | else: 61 | fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})" 62 | return fmtstr.format(**self.__dict__) 63 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/vl_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | from typing import List 4 | 5 | import torch 6 | 7 | 8 | def create_positive_map_from_span(tokenized, token_span, max_text_len=256): 9 | """construct a map such that positive_map[i,j] = True iff box i is associated to token j 10 | Input: 11 | - tokenized: 12 | - input_ids: Tensor[1, ntokens] 13 | - attention_mask: Tensor[1, ntokens] 14 | - token_span: list with length num_boxes. 15 | - each item: [start_idx, end_idx] 16 | """ 17 | positive_map = torch.zeros((len(token_span), max_text_len), dtype=torch.float) 18 | for j, tok_list in enumerate(token_span): 19 | for (beg, end) in tok_list: 20 | beg_pos = tokenized.char_to_token(beg) 21 | end_pos = tokenized.char_to_token(end - 1) 22 | if beg_pos is None: 23 | try: 24 | beg_pos = tokenized.char_to_token(beg + 1) 25 | if beg_pos is None: 26 | beg_pos = tokenized.char_to_token(beg + 2) 27 | except: 28 | beg_pos = None 29 | if end_pos is None: 30 | try: 31 | end_pos = tokenized.char_to_token(end - 2) 32 | if end_pos is None: 33 | end_pos = tokenized.char_to_token(end - 3) 34 | except: 35 | end_pos = None 36 | if beg_pos is None or end_pos is None: 37 | continue 38 | 39 | assert beg_pos is not None and end_pos is not None 40 | if os.environ.get("SHILONG_DEBUG_ONLY_ONE_POS", None) == "TRUE": 41 | positive_map[j, beg_pos] = 1 42 | break 43 | else: 44 | positive_map[j, beg_pos : end_pos + 1].fill_(1) 45 | 46 | return positive_map / (positive_map.sum(-1)[:, None] + 1e-6) 47 | 48 | 49 | def build_captions_and_token_span(cat_list, force_lowercase): 50 | """ 51 | Return: 52 | captions: str 53 | cat2tokenspan: dict 54 | { 55 | 'dog': [[0, 2]], 56 | ... 57 | } 58 | """ 59 | 60 | cat2tokenspan = {} 61 | captions = "" 62 | for catname in cat_list: 63 | class_name = catname 64 | if force_lowercase: 65 | class_name = class_name.lower() 66 | if "/" in class_name: 67 | class_name_list: List = class_name.strip().split("/") 68 | class_name_list.append(class_name) 69 | class_name: str = random.choice(class_name_list) 70 | 71 | tokens_positive_i = [] 72 | subnamelist = [i.strip() for i in class_name.strip().split(" ")] 73 | for subname in subnamelist: 74 | if len(subname) == 0: 75 | continue 76 | if len(captions) > 0: 77 | captions = captions + " " 78 | strat_idx = len(captions) 79 | end_idx = strat_idx + len(subname) 80 | tokens_positive_i.append([strat_idx, end_idx]) 81 | captions = captions + subname 82 | 83 | if len(tokens_positive_i) > 0: 84 | captions = captions + " ." 85 | cat2tokenspan[class_name] = tokens_positive_i 86 | 87 | return captions, cat2tokenspan 88 | 89 | 90 | def build_id2posspan_and_caption(category_dict: dict): 91 | """Build id2pos_span and caption from category_dict 92 | 93 | Args: 94 | category_dict (dict): category_dict 95 | """ 96 | cat_list = [item["name"].lower() for item in category_dict] 97 | id2catname = {item["id"]: item["name"].lower() for item in category_dict} 98 | caption, cat2posspan = build_captions_and_token_span(cat_list, force_lowercase=True) 99 | id2posspan = {catid: cat2posspan[catname] for catid, catname in id2catname.items()} 100 | return id2posspan, caption 101 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.0' 2 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .build_sam import ( 8 | build_sam, 9 | build_sam_vit_h, 10 | build_sam_vit_l, 11 | build_sam_vit_b, 12 | sam_model_registry, 13 | ) 14 | from .predictor import SamPredictor 15 | from .automatic_mask_generator import SamAutomaticMaskGenerator 16 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/automatic_mask_generator.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/automatic_mask_generator.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/automatic_mask_generator.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/automatic_mask_generator.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/build_sam.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/build_sam.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/build_sam.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/build_sam.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/predictor.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/predictor.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/predictor.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/predictor.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/build_sam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | 9 | from functools import partial 10 | 11 | from .modeling import ImageEncoderViT, MaskDecoder, PromptEncoder, Sam, TwoWayTransformer 12 | 13 | 14 | def build_sam_vit_h(checkpoint=None): 15 | return _build_sam( 16 | encoder_embed_dim=1280, 17 | encoder_depth=32, 18 | encoder_num_heads=16, 19 | encoder_global_attn_indexes=[7, 15, 23, 31], 20 | checkpoint=checkpoint, 21 | ) 22 | 23 | 24 | build_sam = build_sam_vit_h 25 | 26 | 27 | def build_sam_vit_l(checkpoint=None): 28 | return _build_sam( 29 | encoder_embed_dim=1024, 30 | encoder_depth=24, 31 | encoder_num_heads=16, 32 | encoder_global_attn_indexes=[5, 11, 17, 23], 33 | checkpoint=checkpoint, 34 | ) 35 | 36 | 37 | def build_sam_vit_b(checkpoint=None): 38 | return _build_sam( 39 | encoder_embed_dim=768, 40 | encoder_depth=12, 41 | encoder_num_heads=12, 42 | encoder_global_attn_indexes=[2, 5, 8, 11], 43 | checkpoint=checkpoint, 44 | ) 45 | 46 | 47 | sam_model_registry = { 48 | "default": build_sam, 49 | "vit_h": build_sam, 50 | "vit_l": build_sam_vit_l, 51 | "vit_b": build_sam_vit_b, 52 | } 53 | 54 | 55 | def _build_sam( 56 | encoder_embed_dim, 57 | encoder_depth, 58 | encoder_num_heads, 59 | encoder_global_attn_indexes, 60 | checkpoint=None, 61 | ): 62 | prompt_embed_dim = 256 63 | image_size = 1024 64 | vit_patch_size = 16 65 | image_embedding_size = image_size // vit_patch_size 66 | sam = Sam( 67 | image_encoder=ImageEncoderViT( 68 | depth=encoder_depth, 69 | embed_dim=encoder_embed_dim, 70 | img_size=image_size, 71 | mlp_ratio=4, 72 | norm_layer=partial(torch.nn.LayerNorm, eps=1e-6), 73 | num_heads=encoder_num_heads, 74 | patch_size=vit_patch_size, 75 | qkv_bias=True, 76 | use_rel_pos=True, 77 | global_attn_indexes=encoder_global_attn_indexes, 78 | window_size=14, 79 | out_chans=prompt_embed_dim, 80 | ), 81 | prompt_encoder=PromptEncoder( 82 | embed_dim=prompt_embed_dim, 83 | image_embedding_size=(image_embedding_size, image_embedding_size), 84 | input_image_size=(image_size, image_size), 85 | mask_in_chans=16, 86 | ), 87 | mask_decoder=MaskDecoder( 88 | num_multimask_outputs=3, 89 | transformer=TwoWayTransformer( 90 | depth=2, 91 | embedding_dim=prompt_embed_dim, 92 | mlp_dim=2048, 93 | num_heads=8, 94 | ), 95 | transformer_dim=prompt_embed_dim, 96 | iou_head_depth=3, 97 | iou_head_hidden_dim=256, 98 | ), 99 | pixel_mean=[123.675, 116.28, 103.53], 100 | pixel_std=[58.395, 57.12, 57.375], 101 | ) 102 | sam.eval() 103 | if checkpoint is not None: 104 | with open(checkpoint, "rb") as f: 105 | state_dict = torch.load(f) 106 | sam.load_state_dict(state_dict) 107 | return sam 108 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .sam import Sam 8 | from .image_encoder import ImageEncoderViT 9 | from .mask_decoder import MaskDecoder 10 | from .prompt_encoder import PromptEncoder 11 | from .transformer import TwoWayTransformer 12 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/common.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/common.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/common.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/common.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/image_encoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/image_encoder.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/image_encoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/image_encoder.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/mask_decoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/mask_decoder.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/mask_decoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/mask_decoder.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/prompt_encoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/prompt_encoder.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/prompt_encoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/prompt_encoder.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/sam.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/sam.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/sam.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/sam.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/transformer.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/transformer.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | from typing import Type 11 | 12 | 13 | class MLPBlock(nn.Module): 14 | def __init__( 15 | self, 16 | embedding_dim: int, 17 | mlp_dim: int, 18 | act: Type[nn.Module] = nn.GELU, 19 | ) -> None: 20 | super().__init__() 21 | self.lin1 = nn.Linear(embedding_dim, mlp_dim) 22 | self.lin2 = nn.Linear(mlp_dim, embedding_dim) 23 | self.act = act() 24 | 25 | def forward(self, x: torch.Tensor) -> torch.Tensor: 26 | return self.lin2(self.act(self.lin1(x))) 27 | 28 | 29 | # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa 30 | # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa 31 | class LayerNorm2d(nn.Module): 32 | def __init__(self, num_channels: int, eps: float = 1e-6) -> None: 33 | super().__init__() 34 | self.weight = nn.Parameter(torch.ones(num_channels)) 35 | self.bias = nn.Parameter(torch.zeros(num_channels)) 36 | self.eps = eps 37 | 38 | def forward(self, x: torch.Tensor) -> torch.Tensor: 39 | u = x.mean(1, keepdim=True) 40 | s = (x - u).pow(2).mean(1, keepdim=True) 41 | x = (x - u) / torch.sqrt(s + self.eps) 42 | x = self.weight[:, None, None] * x + self.bias[:, None, None] 43 | return x 44 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/amg.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/amg.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/amg.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/amg.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/transforms.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/transforms.cpython-37.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/transforms.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/transforms.cpython-38.pyc -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import numpy as np 8 | import torch 9 | from torch.nn import functional as F 10 | from torchvision.transforms.functional import resize, to_pil_image # type: ignore 11 | 12 | from copy import deepcopy 13 | from typing import Tuple 14 | 15 | 16 | class ResizeLongestSide: 17 | """ 18 | Resizes images to longest side 'target_length', as well as provides 19 | methods for resizing coordinates and boxes. Provides methods for 20 | transforming both numpy array and batched torch tensors. 21 | """ 22 | 23 | def __init__(self, target_length: int) -> None: 24 | self.target_length = target_length 25 | 26 | def apply_image(self, image: np.ndarray) -> np.ndarray: 27 | """ 28 | Expects a numpy array with shape HxWxC in uint8 format. 29 | """ 30 | target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length) 31 | return np.array(resize(to_pil_image(image), target_size)) 32 | 33 | def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray: 34 | """ 35 | Expects a numpy array of length 2 in the final dimension. Requires the 36 | original image size in (H, W) format. 37 | """ 38 | old_h, old_w = original_size 39 | new_h, new_w = self.get_preprocess_shape( 40 | original_size[0], original_size[1], self.target_length 41 | ) 42 | coords = deepcopy(coords).astype(float) 43 | coords[..., 0] = coords[..., 0] * (new_w / old_w) 44 | coords[..., 1] = coords[..., 1] * (new_h / old_h) 45 | return coords 46 | 47 | def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray: 48 | """ 49 | Expects a numpy array shape Bx4. Requires the original image size 50 | in (H, W) format. 51 | """ 52 | boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size) 53 | return boxes.reshape(-1, 4) 54 | 55 | def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor: 56 | """ 57 | Expects batched images with shape BxCxHxW and float format. This 58 | transformation may not exactly match apply_image. apply_image is 59 | the transformation expected by the model. 60 | """ 61 | # Expects an image in BCHW format. May not exactly match apply_image. 62 | target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length) 63 | return F.interpolate( 64 | image, target_size, mode="bilinear", align_corners=False, antialias=True 65 | ) 66 | 67 | def apply_coords_torch( 68 | self, coords: torch.Tensor, original_size: Tuple[int, ...] 69 | ) -> torch.Tensor: 70 | """ 71 | Expects a torch tensor with length 2 in the last dimension. Requires the 72 | original image size in (H, W) format. 73 | """ 74 | old_h, old_w = original_size 75 | new_h, new_w = self.get_preprocess_shape( 76 | original_size[0], original_size[1], self.target_length 77 | ) 78 | coords = deepcopy(coords).to(torch.float) 79 | coords[..., 0] = coords[..., 0] * (new_w / old_w) 80 | coords[..., 1] = coords[..., 1] * (new_h / old_h) 81 | return coords 82 | 83 | def apply_boxes_torch( 84 | self, boxes: torch.Tensor, original_size: Tuple[int, ...] 85 | ) -> torch.Tensor: 86 | """ 87 | Expects a torch tensor with shape Bx4. Requires the original image 88 | size in (H, W) format. 89 | """ 90 | boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size) 91 | return boxes.reshape(-1, 4) 92 | 93 | @staticmethod 94 | def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]: 95 | """ 96 | Compute the output size given input size and target long side length. 97 | """ 98 | scale = long_side_length * 1.0 / max(oldh, oldw) 99 | newh, neww = oldh * scale, oldw * scale 100 | neww = int(neww + 0.5) 101 | newh = int(newh + 0.5) 102 | return (newh, neww) 103 | -------------------------------------------------------------------------------- /Annotation_Pipeline/Phase II/segment_anything_/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from setuptools import find_packages, setup 8 | 9 | setup( 10 | name="segment_anything", 11 | version="1.0", 12 | install_requires=[], 13 | packages=find_packages(exclude="notebooks"), 14 | extras_require={ 15 | "all": ["matplotlib", "pycocotools", "opencv-python", "onnx", "onnxruntime"], 16 | "dev": ["flake8", "isort", "black", "mypy"], 17 | }, 18 | ) 19 | -------------------------------------------------------------------------------- /Method/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/.DS_Store -------------------------------------------------------------------------------- /Method/__pycache__/openai.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/__pycache__/openai.cpython-38.pyc -------------------------------------------------------------------------------- /Method/__pycache__/optim.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/__pycache__/optim.cpython-37.pyc -------------------------------------------------------------------------------- /Method/__pycache__/optim.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/__pycache__/optim.cpython-38.pyc -------------------------------------------------------------------------------- /Method/__pycache__/scheduler.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/__pycache__/scheduler.cpython-37.pyc -------------------------------------------------------------------------------- /Method/__pycache__/scheduler.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/__pycache__/scheduler.cpython-38.pyc -------------------------------------------------------------------------------- /Method/accelerators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/accelerators/__init__.py -------------------------------------------------------------------------------- /Method/accelerators/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/accelerators/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Method/accelerators/__pycache__/accelerator.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/accelerators/__pycache__/accelerator.cpython-37.pyc -------------------------------------------------------------------------------- /Method/accelerators/__pycache__/apex_ddp_accelerator.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/accelerators/__pycache__/apex_ddp_accelerator.cpython-37.pyc -------------------------------------------------------------------------------- /Method/accelerators/accelerator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276) 3 | # Github: https://github.com/zengyan-97/X-VLM 4 | # Copyright (c) 2022, ByteDance Inc. 5 | # All rights reserved. 6 | 7 | from logging import Logger 8 | 9 | import torch 10 | from torch.optim import Optimizer 11 | 12 | Net = torch.nn.Module 13 | 14 | 15 | class Accelerator: 16 | def __init__(self, cfg, logger) -> None: 17 | self.cfg = cfg 18 | self.logger = logger 19 | 20 | def set_up(self, model: Net): 21 | raise NotImplementedError("Set Up method not implement in Accelerator, please check! ") 22 | 23 | def broadcast(self): 24 | raise NotImplementedError("Broadcast method not implement in Accelerator, please check! ") 25 | 26 | def backward_step(self, loss: torch.Tensor): 27 | loss.backward() 28 | 29 | def optimizer_step(self, optimizer: Optimizer, model: Net, grad_norm: float) -> float: 30 | total_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), 31 | grad_norm) 32 | return float(total_norm) 33 | -------------------------------------------------------------------------------- /Method/accelerators/apex_ddp_accelerator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276) 3 | # Github: https://github.com/zengyan-97/X-VLM 4 | # Copyright (c) 2022, ByteDance Inc. 5 | # All rights reserved. 6 | 7 | import os 8 | import random 9 | import sys 10 | from typing import Tuple, Union, Optional, Any 11 | import numpy as np 12 | 13 | import torch 14 | import torch.distributed as distributed 15 | from torch.optim import Optimizer 16 | from torch.optim.lr_scheduler import LambdaLR 17 | 18 | Net = torch.nn.Module 19 | 20 | from .accelerator import Accelerator 21 | 22 | # try: 23 | from apex import amp 24 | from apex.parallel import DistributedDataParallel as Apex_DDP 25 | from apex.parallel import convert_syncbn_model 26 | # except ImportError: 27 | # print('no apex! Please install from https://www.github.com/nvidia/apex') 28 | 29 | 30 | class ApexDDPAccelerator(Accelerator): 31 | """ 32 | ApexDDPAccelerator, use apex DistributedDataParallel 33 | """ 34 | 35 | def __init__(self, cfg, logger): 36 | super().__init__(cfg, logger) 37 | self.accelerator_rng_seed = self.cfg.RNG_SEED 38 | self.accelerator_syncbn = self.cfg.SYNCBN 39 | self.accelerator_fp16_opt_level = self.cfg.FP16_OPT_LEVEL 40 | self.accelerator_fp16_loss_scale = self.cfg.FP16_LOSS_SCALE 41 | 42 | def set_up(self, model: Net, optimizer: Optimizer, lr_scheduler: LambdaLR, 43 | local_rank: int, world_size: int, rank: int) -> Tuple[Apex_DDP, Optimizer, LambdaLR]: 44 | """ 45 | set up ApexDDPAccelerator, including process_group and apex_ddp 46 | """ 47 | torch.backends.cudnn.benchmark = False 48 | random.seed(self.accelerator_rng_seed) 49 | np.random.seed(self.accelerator_rng_seed) 50 | torch.random.manual_seed(self.accelerator_rng_seed) 51 | torch.cuda.manual_seed_all(self.accelerator_rng_seed) 52 | master_address = os.environ.get('MASTER_ADDR', "127.0.0.1") 53 | master_port = int(os.environ.get('MASTER_PORT', 34171)) 54 | 55 | torch.cuda.set_device(local_rank) 56 | model = model.cuda() 57 | if not torch.distributed.is_initialized(): 58 | distributed.init_process_group( 59 | backend='nccl', 60 | init_method='tcp://{}:{}'.format(master_address, master_port), 61 | world_size=world_size, 62 | rank=rank, 63 | group_name='mtorch') 64 | print( 65 | f'ApexDDPAccelerator distributed, size: {world_size}, rank: {rank}, local rank: {local_rank}') 66 | sys.stdout.flush() 67 | 68 | self.broadcast(model) 69 | apex_model, optimizer = self.configure_ddp(model, optimizer) 70 | 71 | if self.accelerator_syncbn: 72 | apex_model = self.configure_sync_batchnorm(apex_model) 73 | return apex_model, optimizer, lr_scheduler 74 | 75 | def broadcast(self, model: Net, src=0) -> None: 76 | for v in model.state_dict().values(): 77 | distributed.broadcast(v, src) 78 | 79 | def configure_ddp(self, model: Net, optimizer: Optimizer) -> Tuple[Apex_DDP, Optimizer]: 80 | model, optimizer = amp.initialize(model, optimizer, 81 | opt_level=self.accelerator_fp16_opt_level, 82 | keep_batchnorm_fp32=None, # from True to None 83 | loss_scale=self.accelerator_fp16_loss_scale, 84 | max_loss_scale=1024.0, 85 | min_loss_scale=1.0) 86 | 87 | apex_model = Apex_DDP(model, delay_allreduce=True) 88 | self.ddp_model = apex_model 89 | return apex_model, optimizer 90 | 91 | def configure_sync_batchnorm(self, model: Net) -> Net: 92 | model = convert_syncbn_model(model) 93 | return model 94 | 95 | def backward_step(self, loss: torch.Tensor, optimizer: Optimizer): 96 | with amp.scale_loss(loss, optimizer) as scaled_loss: 97 | scaled_loss.backward() 98 | 99 | def optimizer_step(self, optimizer: Optimizer, model: Net, grad_norm: float) -> float: 100 | total_norm = torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 101 | grad_norm) 102 | return float(total_norm) 103 | -------------------------------------------------------------------------------- /Method/config.yaml: -------------------------------------------------------------------------------- 1 | accelerator: {CLIP_GRAD_NORM: 1.0, FP16_LOSS_SCALE: dynamic, FP16_OPT_LEVEL: O1, GRAD_ACCUMULATE_STEPS: 1, 2 | RNG_SEED: 42, SYNCBN: false} 3 | calc_image_bbox_loss: false 4 | ckpt_frequent: 5 5 | ckpt_frequent_step: 50000 6 | embed_dim: 256 7 | image_res: 224 8 | images: {batch_size: 128, caption_key: caption, image_key: binary, is_image_rpath: false, 9 | num_workers: 4, tokenized: false} 10 | load_bertL_by_sep: false 11 | mask_prob: 0.25 12 | mask_whole_word: true 13 | max_masks: 8 14 | max_tokens: 40 15 | max_words: 40 16 | optimizer: {lr: 0.0001, lr_mult: 2, opt: adamW, weight_decay: 0.01} 17 | patch_size: 32 18 | regions: {batch_size: 128, caption_key: caption, image_key: binary, is_image_rpath: false, 19 | iter_perc: 0.5, max_images: 48, max_regions: 5, min_perc_in_image: 0.5, num_workers: 4, 20 | tokenized: false} 21 | schedular: {epochs: 41, lr: 0.0001, num_warmup_steps: 2500, sched: linear} 22 | skipgram_prb: 0.2 23 | skipgram_size: 3 24 | temp: 0.07 25 | text_config: configs/config_bert.json 26 | text_encoder: /storage_fast/mchu/blip2/VLM/X-VLM/data/bert 27 | train_dataset_size: 5114489 28 | train_file: [hdfs://path/to/coco, hdfs://path/to/vg, hdfs://path/to/sbu, hdfs://path/to/cc3m] 29 | train_file_regions: [hdfs://path/to/coco_objs, hdfs://path/to/vg_objs, hdfs://path/to/vg_regions] 30 | use_clip_vit: false 31 | use_roberta: false 32 | use_swin: true 33 | vision_config: configs/config_swinB_224.json 34 | -------------------------------------------------------------------------------- /Method/configs/config_bert.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForMaskedLM" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "hidden_act": "gelu", 7 | "hidden_dropout_prob": 0.1, 8 | "hidden_size": 768, 9 | "initializer_range": 0.02, 10 | "intermediate_size": 3072, 11 | "layer_norm_eps": 1e-12, 12 | "max_position_embeddings": 512, 13 | "model_type": "bert", 14 | "num_attention_heads": 12, 15 | "num_hidden_layers": 12, 16 | "pad_token_id": 0, 17 | "type_vocab_size": 2, 18 | "vocab_size": 30522, 19 | "fusion_layer": 6, 20 | "encoder_width": 1024 21 | } 22 | -------------------------------------------------------------------------------- /Method/configs/config_clipvitB.json: -------------------------------------------------------------------------------- 1 | { 2 | "ckpt": "data/clip-vit-base-patch16.bin", 3 | "vision_width": 768, 4 | "patch_size": 16, 5 | "hidden_act": "quick_gelu", 6 | "num_attention_heads": 12, 7 | "attention_dropout": 0.0, 8 | "intermediate_size": 3072, 9 | "num_hidden_layers": 12, 10 | "local_attn_depth": 4 11 | } 12 | -------------------------------------------------------------------------------- /Method/configs/config_roberta.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "RobertaForMaskedLM" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "bos_token_id": 0, 7 | "eos_token_id": 2, 8 | "hidden_act": "gelu", 9 | "hidden_dropout_prob": 0.1, 10 | "hidden_size": 768, 11 | "initializer_range": 0.02, 12 | "intermediate_size": 3072, 13 | "layer_norm_eps": 1e-05, 14 | "max_position_embeddings": 514, 15 | "model_type": "roberta", 16 | "num_attention_heads": 12, 17 | "num_hidden_layers": 12, 18 | "pad_token_id": 1, 19 | "type_vocab_size": 1, 20 | "vocab_size": 50265, 21 | "fusion_layer": 6, 22 | "encoder_width": 1024 23 | } 24 | -------------------------------------------------------------------------------- /Method/configs/config_swinB_224.json: -------------------------------------------------------------------------------- 1 | { 2 | "ckpt": "/storage_fast/mchu/Multi-model/VLM/X-VLM/data/swin_base_patch4_window7_224_22k.pth", 3 | "vision_width": 1024, 4 | "image_res": 224, 5 | "window_size": 7, 6 | "embed_dim": 128, 7 | "depths": [ 2, 2, 18, 2 ], 8 | "num_heads": [ 4, 8, 16, 32 ] 9 | } 10 | -------------------------------------------------------------------------------- /Method/configs/config_swinB_384.json: -------------------------------------------------------------------------------- 1 | { 2 | "ckpt": "/root/GeoText-1652/GeoText1652_model/swin_base_patch4_window7_224_22k.pth", 3 | "vision_width": 1024, 4 | "image_res": 384, 5 | "window_size": 12, 6 | "embed_dim": 128, 7 | "depths": [ 2, 2, 18, 2 ], 8 | "num_heads": [ 4, 8, 16, 32 ] 9 | } 10 | -------------------------------------------------------------------------------- /Method/configs/config_swinB_480.json: -------------------------------------------------------------------------------- 1 | { 2 | "ckpt": "/storage_fast/mchu/Multi-model/VLM/X-VLM/data/swin_base_patch4_window7_224_22k.pth", 3 | "vision_width": 1024, 4 | "image_res": 480, 5 | "window_size": 15, 6 | "embed_dim": 128, 7 | "depths": [ 2, 2, 18, 2 ], 8 | "num_heads": [ 4, 8, 16, 32 ] 9 | } 10 | -------------------------------------------------------------------------------- /Method/configs/re_bbox.yaml: -------------------------------------------------------------------------------- 1 | train_file: ["/root/GeoText-1652/GeoText1652_Dataset/train.json"] 2 | test_file: "/root/GeoText-1652/GeoText1652_Dataset/test_951_version.json" 3 | image_root: '/root/GeoText-1652/GeoText1652_Dataset/images' 4 | 5 | ## Vision Encoder 6 | vision_config: 'configs/config_swinB_384.json' 7 | 8 | use_clip_vit: False 9 | #image_res: 384 10 | #patch_size: 16 11 | 12 | use_swin: True 13 | image_res: 384 14 | patch_size: 32 15 | 16 | 17 | ## Text Encoder 18 | use_roberta: False 19 | text_config: 'configs/config_bert.json' # ['configs/config_bert.json', 'configs/config_roberta.json'] 20 | text_encoder: '/root/GeoText-1652/GeoText1652_model/bert' # ['data/bert-base-uncased', 'data/roberta-base'] 21 | 22 | 23 | 24 | ## Training 25 | batch_size_train: 24 26 | batch_size_test: 1 27 | batch_size_test_text: 512 28 | max_tokens: 50 29 | embed_dim: 256 30 | temp: 0.07 31 | k_test: 256 32 | 33 | 34 | ## Other Settings 35 | # optimizer: {opt: adamW, lr: 3e-6, weight_decay: 0.001, lr_mult: 2} 36 | # schedular: {sched: linear, lr: 3e-6, epochs: 3, num_warmup_steps: 0.1} 37 | 38 | 39 | optimizer: {opt: adamW, lr: 3e-5, weight_decay: 0.01, lr_mult: 2} 40 | schedular: {sched: linear, lr: 3e-5, epochs: 1, num_warmup_steps: 0.1} 41 | -------------------------------------------------------------------------------- /Method/configs/vlue-base-test/Grounding_bbox.yaml: -------------------------------------------------------------------------------- 1 | test_file: ['data/vlue_released/refcoco+_vlue_test.json'] 2 | image_root: 'images/marvl/' 3 | vlue_test: True 4 | 5 | 6 | ## Vision Encoder 7 | vision_config: 'configs/config_swinB_384.json' 8 | 9 | use_clip_vit: False 10 | #image_res: 384 11 | #patch_size: 16 12 | 13 | use_swin: True 14 | image_res: 384 15 | patch_size: 32 16 | 17 | 18 | ## Text Encoder 19 | use_roberta: False 20 | text_config: 'configs/config_bert.json' # ['configs/config_bert.json', 'configs/config_roberta.json'] 21 | text_encoder: 'data/bert-base-uncased' # ['data/bert-base-uncased', 'data/roberta-base'] 22 | 23 | 24 | ## Training 25 | batch_size: 20 26 | max_tokens: 40 27 | careful_hflip: True # first check whether 'left' or 'right' in captions 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /Method/configs/vlue-base-test/Grounding_weakly.yaml: -------------------------------------------------------------------------------- 1 | test_file: ['data/vlue_released/refcoco+_bbox100_vlue_test.json'] 2 | image_root: 'images/marvl/' 3 | vlue_test: True 4 | 5 | 6 | ## Vision Encoder 7 | vision_config: 'configs/config_swinB_384.json' 8 | 9 | use_clip_vit: False 10 | #image_res: 384 11 | #patch_size: 16 12 | 13 | use_swin: True 14 | image_res: 384 15 | patch_size: 32 16 | 17 | 18 | ## Text Encoder 19 | use_roberta: False 20 | text_config: 'configs/config_bert.json' # ['configs/config_bert.json', 'configs/config_roberta.json'] 21 | text_encoder: 'data/bert-base-uncased' # ['data/bert-base-uncased', 'data/roberta-base'] 22 | 23 | 24 | ## Training 25 | batch_size: 20 26 | block_num: 9 # i.e. the layer to calculate cross-attn; adjust it to get best performance 27 | max_tokens: 40 28 | embed_dim: 256 29 | temp: 0.07 30 | -------------------------------------------------------------------------------- /Method/configs/vlue-base-test/NLVR.yaml: -------------------------------------------------------------------------------- 1 | test_file: ['data/vlue_released/nlvr2_vlue_test.json'] 2 | image_root: 'images/marvl/' 3 | 4 | ## Vision Encoder 5 | vision_config: 'configs/config_swinB_384.json' 6 | 7 | use_clip_vit: False 8 | #image_res: 384 9 | #patch_size: 16 10 | 11 | use_swin: True 12 | image_res: 384 13 | patch_size: 32 14 | 15 | 16 | ## Text Encoder 17 | use_roberta: False 18 | text_config: 'configs/config_bert.json' # ['configs/config_bert.json', 'configs/config_roberta.json'] 19 | text_encoder: 'data/bert-base-uncased' # ['data/bert-base-uncased', 'data/roberta-base'] 20 | 21 | 22 | ## Training 23 | batch_size: 20 # 1 24 | -------------------------------------------------------------------------------- /Method/configs/vlue-base-test/Retrieval.yaml: -------------------------------------------------------------------------------- 1 | test_file: 'data/vlue_released/itr_vlue_test.json' 2 | image_root: 'images/marvl/' 3 | 4 | ## Vision Encoder 5 | vision_config: 'configs/config_swinB_384.json' 6 | 7 | use_clip_vit: False 8 | #image_res: 384 9 | #patch_size: 16 10 | 11 | use_swin: True 12 | image_res: 384 13 | patch_size: 32 14 | 15 | 16 | ## Text Encoder 17 | use_roberta: False 18 | text_config: 'configs/config_bert.json' # ['configs/config_bert.json', 'configs/config_roberta.json'] 19 | text_encoder: 'data/bert-base-uncased' # ['data/bert-base-uncased', 'data/roberta-base'] 20 | 21 | 22 | ## Training 23 | batch_size_test: 64 # 1 24 | batch_size_test_text: 64 # 1 25 | max_tokens: 40 26 | embed_dim: 256 27 | temp: 0.07 28 | k_test: 256 29 | -------------------------------------------------------------------------------- /Method/configs/vlue-base-test/VQA.yaml: -------------------------------------------------------------------------------- 1 | test_file: ['data/vlue_released/vqa_vlue_test.json'] 2 | vqa_root: 'images/marvl/' 3 | vg_root: 'images/visualgenome/' 4 | answer_list: 'data/finetune/answer_list.json' 5 | 6 | 7 | ## Vision Encoder 8 | vision_config: 'configs/config_swinB_384.json' 9 | 10 | use_clip_vit: False 11 | #image_res: 384 12 | #patch_size: 16 13 | 14 | use_swin: True 15 | image_res: 384 16 | patch_size: 32 17 | 18 | ## Text Encoder 19 | use_roberta: False 20 | text_config: 'configs/config_bert.json' # ['configs/config_bert.json', 'configs/config_roberta.json'] 21 | text_encoder: 'data/bert-base-uncased' # ['data/bert-base-uncased', 'data/roberta-base'] 22 | 23 | 24 | ## Training 25 | num_dec_layers: 6 26 | batch_size_test: 32 27 | max_tokens: 40 28 | k_test: 128 29 | 30 | 31 | -------------------------------------------------------------------------------- /Method/configs/vlue-base-test/VQA_480.yaml: -------------------------------------------------------------------------------- 1 | test_file: ['data/vlue_released/vqa_vlue_test.json'] 2 | vqa_root: 'images/marvl/' 3 | vg_root: 'images/visualgenome/' 4 | answer_list: 'data/finetune/answer_list.json' 5 | 6 | ## Vision Encoder 7 | vision_config: 'configs/config_swinB_480.json' 8 | 9 | use_clip_vit: False 10 | #image_res: 480 11 | #patch_size: 16 12 | 13 | use_swin: True 14 | image_res: 480 15 | patch_size: 32 16 | 17 | ## Text Encoder 18 | use_roberta: False 19 | text_config: 'configs/config_bert.json' # ['configs/config_bert.json', 'configs/config_roberta.json'] 20 | text_encoder: 'data/bert-base-uncased' # ['data/bert-base-uncased', 'data/roberta-base'] 21 | 22 | 23 | ## Training 24 | num_dec_layers: 6 25 | batch_size_test: 32 26 | max_tokens: 40 27 | k_test: 128 28 | 29 | -------------------------------------------------------------------------------- /Method/dataset/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/coco_karpathy_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/coco_karpathy_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/coco_karpathy_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/coco_karpathy_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/dist_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/dist_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/dist_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/dist_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/grounding_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/grounding_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/grounding_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/grounding_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/nlvr_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/nlvr_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/nlvr_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/nlvr_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/pretrain_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/pretrain_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/pretrain_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/pretrain_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/randaugment.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/randaugment.cpython-37.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/randaugment.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/randaugment.cpython-38.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/re_bbox_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/re_bbox_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/re_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/re_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/re_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/re_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/vqa_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/vqa_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /Method/dataset/__pycache__/vqa_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/vqa_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /Method/dataset/coco_karpathy_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import random 4 | from collections import Counter 5 | 6 | import torch 7 | from torch.utils.data import Dataset 8 | from torchvision.datasets.utils import download_url 9 | 10 | from PIL import Image 11 | 12 | from dataset.utils import pre_caption 13 | 14 | 15 | class coco_karpathy_train(Dataset): 16 | def __init__(self, transform, image_root, ann_rpath, max_words=30, prompt=''): 17 | self.annotation = [] 18 | for f in ann_rpath: 19 | self.annotation += json.load(open(f, 'r')) 20 | 21 | self.transform = transform 22 | self.image_root = image_root 23 | self.max_words = max_words 24 | self.prompt = prompt 25 | 26 | self.img_ids = {} 27 | n = 0 28 | for ann in self.annotation: 29 | img_id = ann['image_id'] 30 | if img_id not in self.img_ids.keys(): 31 | self.img_ids[img_id] = n 32 | n += 1 33 | 34 | def __len__(self): 35 | return len(self.annotation) 36 | 37 | def __getitem__(self, index): 38 | 39 | ann = self.annotation[index] 40 | 41 | image_path = os.path.join(self.image_root, ann['image']) 42 | image = Image.open(image_path).convert('RGB') 43 | image = self.transform(image) 44 | 45 | caption = self.prompt + pre_caption(ann['caption'], self.max_words) 46 | 47 | return image, caption, self.img_ids[ann['image_id']] 48 | 49 | 50 | class coco_karpathy_train_scst(Dataset): 51 | def __init__(self, transform, image_root, ann_rpath, max_words=30, prompt=''): 52 | self.annotation = [] 53 | self.image_captions_map = {} 54 | 55 | for f in ann_rpath: 56 | for ann in json.load(open(f, 'r')): 57 | self.annotation.append(ann) 58 | 59 | if ann['image'] in self.image_captions_map.keys(): 60 | self.image_captions_map[ann['image']].append(ann['caption']) 61 | else: 62 | self.image_captions_map[ann['image']] = [ann['caption']] 63 | 64 | counter = Counter() 65 | for _, v in self.image_captions_map.items(): 66 | counter[len(v)] += 1 67 | print("### image_captions_map, ", counter, flush=True) 68 | 69 | self.transform = transform 70 | self.image_root = image_root 71 | self.max_words = max_words 72 | self.prompt = prompt 73 | 74 | self.img_ids = {} 75 | n = 0 76 | for ann in self.annotation: 77 | img_id = ann['image_id'] 78 | if img_id not in self.img_ids.keys(): 79 | self.img_ids[img_id] = n 80 | n += 1 81 | 82 | def __len__(self): 83 | return len(self.annotation) 84 | 85 | def __getitem__(self, index): 86 | ann = self.annotation[index] 87 | 88 | image_path = os.path.join(self.image_root, ann['image']) 89 | image = Image.open(image_path).convert('RGB') 90 | image = self.transform(image) 91 | 92 | # w/o prompt 93 | captions_gt = [pre_caption(c, self.max_words) for c in self.image_captions_map[ann['image']]] 94 | 95 | return image, random.sample(captions_gt, 5) 96 | 97 | def collate_fn(self, batch_sample): 98 | batch = [] 99 | for x in zip(*batch_sample): 100 | batch.append(x) 101 | 102 | image_list, captions_gt_list = batch 103 | 104 | images = torch.stack(image_list) 105 | 106 | return images, captions_gt_list 107 | 108 | 109 | class coco_karpathy_caption_eval(Dataset): 110 | def __init__(self, transform, image_root, ann_rpath, split): 111 | self.annotation = json.load(open(ann_rpath, 'r')) 112 | self.transform = transform 113 | self.image_root = image_root 114 | 115 | def __len__(self): 116 | return len(self.annotation) 117 | 118 | def __getitem__(self, index): 119 | 120 | ann = self.annotation[index] 121 | 122 | image_path = os.path.join(self.image_root, ann['image']) 123 | image = Image.open(image_path).convert('RGB') 124 | image = self.transform(image) 125 | 126 | img_id = ann['image'].split('/')[-1].strip('.jpg').split('_')[-1] 127 | 128 | return image, int(img_id) 129 | 130 | -------------------------------------------------------------------------------- /Method/dataset/dist_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276) 4 | # Github: https://github.com/zengyan-97/X-VLM 5 | # Copyright (c) 2022, ByteDance Inc. 6 | # All rights reserved. 7 | 8 | import sys 9 | from typing import List, Any 10 | import warnings 11 | import random 12 | from itertools import cycle 13 | import torch 14 | from torch.utils.data import IterableDataset 15 | 16 | from utils.hdfs_io import hopen, hlist_files 17 | 18 | 19 | class DistLineReadingDataset(IterableDataset): # pylint: disable=W0223 20 | """ 21 | iterate a set of folders. 22 | """ 23 | def __init__(self, 24 | data_path: str, 25 | rank: int = 0, 26 | world_size: int = 1, 27 | shuffle: bool = False, 28 | repeat: bool = False): 29 | super().__init__() 30 | self.shuffle = shuffle 31 | self.rank = rank 32 | self.world_size = world_size 33 | 34 | self.files = hlist_files(data_path.split(',')) 35 | self.files = [f for f in self.files if f.find('_SUCCESS') < 0] 36 | self.is_hdfs = data_path.startswith('hdfs') 37 | 38 | self.repeat = repeat 39 | print('[DATA]--all dataset containing {} files.'.format(len(self.files))) 40 | if len(self.files) % self.world_size != 0: 41 | print('[DATA]--Whole dataset file num %s cannot split to worldsize %s ' % 42 | (len(self.files), self.world_size)) 43 | sys.stdout.flush() 44 | 45 | def generate(self): 46 | if self.world_size == 1 or len(self.files) == 1: 47 | cur_dataloader_files = self.files 48 | else: 49 | cur_dataloader_files = split_shard( 50 | self.files, self.rank, self.world_size) 51 | 52 | while True: 53 | if self.shuffle: 54 | random.shuffle(cur_dataloader_files) 55 | worker_info = torch.utils.data.get_worker_info() 56 | 57 | if worker_info is not None: 58 | if len(cur_dataloader_files) % worker_info.num_workers != 0: 59 | print('[DATA]--current dataloader %s file num %s cannot split to worker_num %s ' % 60 | (self.rank, len(cur_dataloader_files), worker_info.num_workers)) 61 | cur_worker_files = split_shard( 62 | cur_dataloader_files, worker_info.id, worker_info.num_workers) 63 | if worker_info.id == 0: 64 | print("[DataLoader] --> Rank:{} Workers:[{} ~ {}][{}] Size of process file:{} ...".format( 65 | self.rank, 0, worker_info.num_workers - 1, worker_info.id, len(cur_dataloader_files))) 66 | else: 67 | cur_worker_files = cur_dataloader_files 68 | 69 | if self.shuffle: 70 | random.shuffle(cur_worker_files) 71 | for filepath in cur_worker_files: 72 | if self.is_hdfs: 73 | with hopen(filepath, 'r') as reader: 74 | for line in reader: 75 | yield line.decode() 76 | continue 77 | with open(filepath, 'r') as reader: 78 | for line in reader: 79 | yield line 80 | 81 | if not self.repeat: 82 | break 83 | 84 | def __iter__(self): 85 | return self.generate() 86 | 87 | 88 | def split_shard(data: List[Any], shard_idx: int, shard_size: int): 89 | num = len(data) 90 | if num < shard_size: 91 | raise RuntimeError("num:{} < shard size:{}".format(num, shard_size)) 92 | start_idx = (num * shard_idx) // shard_size 93 | end_idx = (num * (shard_idx + 1)) // shard_size 94 | return data[start_idx: end_idx] 95 | -------------------------------------------------------------------------------- /Method/dataset/re_bbox_dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import math 4 | import random 5 | from random import random as rand 6 | import torchvision.transforms as transforms 7 | import torch 8 | 9 | from torchvision.transforms.functional import hflip, resize 10 | 11 | from dataset.utils import pre_caption 12 | 13 | 14 | from torch.utils.data import Dataset 15 | 16 | from PIL import Image 17 | from PIL import ImageFile 18 | 19 | ImageFile.LOAD_TRUNCATED_IMAGES = True 20 | Image.MAX_IMAGE_PIXELS = None 21 | 22 | 23 | 24 | class re_dataset_bbox(Dataset): 25 | def __init__(self, ann_file, transform, image_root, max_words=30, mode='train', config=None): 26 | self.image_res = config['image_res'] 27 | 28 | self.ann = [] 29 | for f in ann_file: 30 | self.ann += json.load(open(f, 'r')) 31 | self.transform = transform 32 | self.image_root = image_root 33 | self.max_words = max_words 34 | self.mode = mode 35 | self.img_ids = {} 36 | 37 | n = 0 38 | for ann in self.ann: 39 | img_id = ann['image_id'] 40 | if img_id not in self.img_ids.keys(): 41 | self.img_ids[img_id] = n 42 | n += 1 43 | 44 | def __len__(self): 45 | return len(self.ann) 46 | 47 | def __getitem__(self, index): 48 | # print('Note: This part is in the dataset building process') 49 | 50 | ann = self.ann[index] 51 | caption = pre_caption(ann['caption'], self.max_words) 52 | # print("Here is the caption",caption) 53 | image_path = os.path.join(self.image_root, ann['image']) 54 | image = Image.open(image_path).convert('RGB') 55 | # print("Here is the original image", image) 56 | W, H = image.size 57 | 58 | # random crop 59 | target_bboxes = [] 60 | sens = [] 61 | for sen in ann["sentences"]: 62 | if sen is None: 63 | sen = 'NONE' 64 | else: 65 | sen = pre_caption(sen, self.max_words) 66 | sens.append(sen) 67 | # print("Here are the sens,",sens) 68 | no_bbox_value = -100 69 | no_bbox_tensor = [no_bbox_value, no_bbox_value, no_bbox_value, no_bbox_value] 70 | 71 | for box in ann["bboxes"]: 72 | if box is None: 73 | target_bboxes.append(no_bbox_tensor) 74 | else: 75 | 76 | target_bboxes.append(box) 77 | 78 | image = resize(image, [self.image_res, self.image_res], interpolation=Image.BICUBIC) 79 | image = self.transform(image) 80 | 81 | target_bboxes = torch.tensor(target_bboxes, dtype=torch.float32) 82 | 83 | return image, caption, self.img_ids[ann['image_id']], sens, target_bboxes 84 | 85 | 86 | 87 | 88 | class re_eval_dataset(Dataset): 89 | def __init__(self, ann_file, transform, image_root, max_words=50): 90 | self.ann = json.load(open(ann_file, 'r')) 91 | self.transform = transform 92 | self.image_root = image_root 93 | self.max_words = max_words 94 | 95 | self.text = [] 96 | self.image = [] 97 | self.txt2img = {} 98 | self.img2txt = {} 99 | self.img2building = {} 100 | 101 | txt_id = 0 102 | building_id = 0 103 | ann_building = 0 104 | for img_id, ann in enumerate(self.ann): 105 | ann["building_id"] = ann["image_id"][:4] 106 | if ann_building == 0: 107 | ann_building = ann["building_id"] 108 | self.image.append(ann['image']) 109 | self.img2txt[img_id] = [] 110 | self.img2building[img_id] = building_id 111 | if ann_building != ann["building_id"]: 112 | ann_building = ann["building_id"] 113 | building_id += 1 114 | for i, caption in enumerate(ann['caption']): 115 | self.text.append(pre_caption(caption, self.max_words)) 116 | self.img2txt[img_id].append(txt_id) 117 | self.txt2img[txt_id] = img_id 118 | txt_id += 1 119 | 120 | def __len__(self): 121 | return len(self.image) 122 | 123 | def __getitem__(self, index): 124 | 125 | image_path = os.path.join(self.image_root, self.ann[index]['image']) 126 | image = Image.open(image_path).convert('RGB') 127 | image = self.transform(image) 128 | 129 | return image, index 130 | -------------------------------------------------------------------------------- /Method/models/__init__.py: -------------------------------------------------------------------------------- 1 | from models.xvlm import XVLMBase 2 | from models.xvlm import build_mlp 3 | from models.xvlm import load_pretrained -------------------------------------------------------------------------------- /Method/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/box_ops.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/box_ops.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/box_ops.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/box_ops.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/clip_vit.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/clip_vit.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/clip_vit.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/clip_vit.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/model_bbox.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_bbox.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/model_pretrain.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_pretrain.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/model_re_bbox.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_re_bbox.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/model_retrieval.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_retrieval.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/model_retrieval.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_retrieval.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/model_vqa.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_vqa.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/swin_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/swin_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/swin_transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/swin_transformer.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/tokenization_bert.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/tokenization_bert.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/tokenization_bert.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/tokenization_bert.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/tokenization_roberta.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/tokenization_roberta.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/tokenization_roberta.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/tokenization_roberta.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/vit.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/vit.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/vit.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/vit.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/xbert.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xbert.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/xbert.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xbert.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/xroberta.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xroberta.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/xroberta.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xroberta.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/xvlm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xvlm.cpython-37.pyc -------------------------------------------------------------------------------- /Method/models/__pycache__/xvlm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xvlm.cpython-38.pyc -------------------------------------------------------------------------------- /Method/models/box_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Utilities for bounding box manipulation and GIoU. 4 | """ 5 | import torch 6 | from torchvision.ops.boxes import box_area 7 | 8 | 9 | def box_cxcywh_to_xyxy(x): # 这个用了 10 | x_c, y_c, w, h = x.unbind(-1) 11 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h), 12 | (x_c + 0.5 * w), (y_c + 0.5 * h)] 13 | return torch.stack(b, dim=-1) 14 | 15 | 16 | def box_xyxy_to_cxcywh(x): 17 | x0, y0, x1, y1 = x.unbind(-1) 18 | b = [(x0 + x1) / 2, (y0 + y1) / 2, 19 | (x1 - x0), (y1 - y0)] 20 | return torch.stack(b, dim=-1) 21 | 22 | 23 | # modified from torchvision to also return the union 24 | def box_iou(boxes1, boxes2): 25 | area1 = box_area(boxes1) 26 | area2 = box_area(boxes2) 27 | 28 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] 29 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] 30 | 31 | wh = (rb - lt).clamp(min=0) # [N,M,2] 32 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 33 | 34 | union = area1[:, None] + area2 - inter 35 | 36 | iou = inter / union 37 | return iou, union 38 | 39 | 40 | def generalized_box_iou(boxes1, boxes2): 41 | """ 42 | Generalized IoU from https://giou.stanford.edu/ 43 | 44 | The boxes should be in [x0, y0, x1, y1] format 45 | 46 | Returns a [N, M] pairwise matrix, where N = len(boxes1) 47 | and M = len(boxes2) 48 | """ 49 | iou, union = box_iou(boxes1, boxes2) 50 | 51 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2]) 52 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) 53 | 54 | wh = (rb - lt).clamp(min=0) # [N,M,2] 55 | area = wh[:, :, 0] * wh[:, :, 1] 56 | 57 | return iou - (area - union) / area 58 | 59 | 60 | def bbox_iou(box1, box2, x1y1x2y2=True): 61 | if not x1y1x2y2: 62 | box1 = torch.cat((box1[..., :2] - box1[..., 2:] / 2, box1[..., :2] + box1[..., 2:] / 2), dim=-1) 63 | box2 = torch.cat((box2[..., :2] - box2[..., 2:] / 2, box2[..., :2] + box2[..., 2:] / 2), dim=-1) 64 | 65 | inter = torch.max(box1[:, None, :2], box2[:, :2]) - torch.min(box1[:, None, 2:], box2[:, 2:]) 66 | inter = torch.clamp(inter, min=0) 67 | inter_area = inter[..., 0] * inter[..., 1] 68 | 69 | area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]) 70 | area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1]) 71 | union_area = area1[:, None] + area2 - inter_area 72 | 73 | return inter_area / union_area 74 | 75 | def bbox_giou(box1, box2, x1y1x2y2=True): 76 | iou = bbox_iou(box1, box2, x1y1x2y2) 77 | 78 | if not x1y1x2y2: 79 | box1 = torch.cat((box1[..., :2] - box1[..., 2:] / 2, box1[..., :2] + box1[..., 2:] / 2), dim=-1) 80 | box2 = torch.cat((box2[..., :2] - box2[..., 2:] / 2, box2[..., :2] + box2[..., 2:] / 2), dim=-1) 81 | 82 | c = torch.max(box1[:, None, 2:], box2[:, 2:]) - torch.min(box1[:, None, :2], box2[:, :2]) 83 | c_area = c[..., 0] * c[..., 1] 84 | 85 | return iou - (c_area - iou) / c_area 86 | 87 | def bbox_diou(box1, box2, x1y1x2y2=True): 88 | iou = bbox_iou(box1, box2, x1y1x2y2) 89 | 90 | if not x1y1x2y2: 91 | box1 = torch.cat((box1[..., :2] - box1[..., 2:] / 2, box1[..., :2] + box1[..., 2:] / 2), dim=-1) 92 | box2 = torch.cat((box2[..., :2] - box2[..., 2:] / 2, box2[..., :2] + box2[..., 2:] / 2), dim=-1) 93 | 94 | center1 = (box1[:, :2] + box1[:, 2:]) / 2 95 | center2 = (box2[:, :2] + box2[:, 2:]) / 2 96 | inter_diag = torch.sum((center2 - center1) ** 2, dim=-1) 97 | 98 | c = torch.max(box1[:, None, 2:], box2[:, 2:]) - torch.min(box1[:, None, :2], box2[:, :2]) 99 | c_diag = torch.sum(c ** 2, dim=-1) 100 | 101 | return iou - inter_diag / c_diag 102 | 103 | 104 | def bbox_ciou(box1, box2, x1y1x2y2=True): 105 | # 首先计算DIoU 106 | diou = bbox_diou(box1, box2, x1y1x2y2) 107 | 108 | if x1y1x2y2: 109 | # 转换为[Cx, Cy, W, H]格式 110 | box1 = torch.cat(((box1[..., 2:] + box1[..., :2]) / 2, box1[..., 2:] - box1[..., :2]), dim=-1) 111 | box2 = torch.cat(((box2[..., 2:] + box2[..., :2]) / 2, box2[..., 2:] - box2[..., :2]), dim=-1) 112 | 113 | w1, h1 = box1[:, 2], box1[:, 3] 114 | w2, h2 = box2[:, 2], box2[:, 3] 115 | 116 | # 计算长宽比的一致性 117 | v = (4 / (math.pi ** 2)) * torch.pow(torch.atan(w1 / h1) - torch.atan(w2 / h2), 2) 118 | 119 | # 计算alpha参数,以避免当重叠区域为0时长宽比项过高的惩罚 120 | with torch.no_grad(): 121 | alpha = v / (1 - diou + v) 122 | 123 | # 最终的CIoU值包括DIoU值和长宽比的一致性 124 | ciou = diou - (alpha * v) 125 | return ciou -------------------------------------------------------------------------------- /Method/optim.py: -------------------------------------------------------------------------------- 1 | from transformers.optimization import AdamW 2 | 3 | 4 | def create_optimizer(args, model): 5 | lr = args.lr 6 | wd = args.weight_decay 7 | lr_mult = getattr(args, 'lr_mult', 1) 8 | print("### lr_mult, ", lr_mult) 9 | 10 | optimizer_grouped_parameters = [ 11 | {"params": [], "weight_decay": wd, "lr": lr}, 12 | {"params": [], "weight_decay": 0.0, "lr": lr}, 13 | {"params": [], "weight_decay": wd, "lr": lr * lr_mult}, 14 | {"params": [], "weight_decay": 0.0, "lr": lr * lr_mult} 15 | ] 16 | 17 | no_decay = {"bias", 18 | "LayerNorm.bias", 19 | "LayerNorm.weight", 20 | "norm.bias", 21 | "norm.weight", 22 | "norm1.bias", 23 | "norm1.weight", 24 | "norm2.bias", 25 | "norm2.weight"} 26 | 27 | if hasattr(model, 'init_params'): 28 | large_lr = model.init_params 29 | print("### model has 'init_params', ", len(large_lr)) 30 | else: 31 | large_lr = {} 32 | 33 | for n, p in model.named_parameters(): 34 | if not p.requires_grad: 35 | continue # frozen weights 36 | 37 | if any(nd in n for nd in no_decay): 38 | if n in large_lr: 39 | optimizer_grouped_parameters[3]['params'].append(p) 40 | else: 41 | optimizer_grouped_parameters[1]['params'].append(p) 42 | else: # decay 43 | if n in large_lr: 44 | optimizer_grouped_parameters[2]['params'].append(p) 45 | else: 46 | optimizer_grouped_parameters[0]['params'].append(p) 47 | 48 | optimizer = AdamW(optimizer_grouped_parameters, lr=lr, eps=1e-8, betas=(0.9, 0.98)) 49 | 50 | return optimizer 51 | -------------------------------------------------------------------------------- /Method/output/all_output_eva/config.yaml: -------------------------------------------------------------------------------- 1 | train_file: [/root/GeoText-1652/GeoText1652_Dataset/train.json] 2 | test_file: /root/GeoText-1652/GeoText1652_Dataset/test_951_version.json 3 | image_root: /root/GeoText-1652/GeoText1652_Dataset/images 4 | 5 | ## Vision Encoder 6 | vision_config: configs/config_swinB_384.json 7 | 8 | use_clip_vit: false 9 | #image_res: 384 10 | #patch_size: 16 11 | 12 | use_swin: true 13 | image_res: 384 14 | patch_size: 32 15 | 16 | 17 | ## Text Encoder 18 | use_roberta: false 19 | text_config: configs/config_bert.json # ['configs/config_bert.json', 'configs/config_roberta.json'] 20 | text_encoder: /root/GeoText-1652/GeoText1652_model/bert # ['data/bert-base-uncased', 'data/roberta-base'] 21 | 22 | 23 | 24 | ## Training 25 | batch_size_train: 24 26 | batch_size_test: 1 27 | batch_size_test_text: 512 28 | max_tokens: 50 29 | embed_dim: 256 30 | temp: 0.07 31 | k_test: 256 32 | 33 | 34 | ## Other Settings 35 | # optimizer: {opt: adamW, lr: 3e-6, weight_decay: 0.001, lr_mult: 2} 36 | # schedular: {sched: linear, lr: 3e-6, epochs: 3, num_warmup_steps: 0.1} 37 | 38 | 39 | optimizer: {opt: adamW, lr: 3e-5, weight_decay: 0.01, lr_mult: 2} 40 | schedular: {sched: linear, lr: 3e-5, epochs: 1, num_warmup_steps: 0.1} 41 | -------------------------------------------------------------------------------- /Method/requirements.txt: -------------------------------------------------------------------------------- 1 | torch -f https://download.pytorch.org/whl/cu118 2 | torchvision -f https://download.pytorch.org/whl/cu118 3 | torchaudio -f https://download.pytorch.org/whl/cu118 4 | 5 | 6 | 7 | 8 | timm==0.4.9 9 | transformers==4.12.5 10 | ruamel_yaml 11 | opencv-python-headless 12 | scikit-image 13 | matplotlib 14 | chardet 15 | charset_normalizer 16 | PyOpenGL 17 | 18 | 19 | 20 | pycocotools 21 | pycocoevalcap 22 | 23 | -------------------------------------------------------------------------------- /Method/scheduler.py: -------------------------------------------------------------------------------- 1 | from torch.optim.lr_scheduler import LambdaLR 2 | 3 | 4 | def create_scheduler(args, optimizer): 5 | if 'num_training_steps' not in args: 6 | args['num_training_steps'] = args['epochs'] * args['step_per_epoch'] 7 | print("### num_training_steps, ", args['num_training_steps'], flush=True) 8 | 9 | if isinstance(args['num_warmup_steps'], float): 10 | assert 0 <= args['num_warmup_steps'] < 1 11 | args['num_warmup_steps'] = int(args['num_training_steps'] * args['num_warmup_steps']) 12 | print("### num_warmup_steps, ", args['num_warmup_steps'], flush=True) 13 | 14 | if args.sched == 'linear': 15 | def lr_lambda(current_step: int): 16 | if current_step < args.num_warmup_steps: 17 | return float(current_step) / float(max(1, args.num_warmup_steps)) 18 | return max( 19 | 0.0, float(args.num_training_steps - current_step) / float( 20 | max(1, args.num_training_steps - args.num_warmup_steps)) 21 | ) 22 | 23 | lr_scheduler = LambdaLR(optimizer, lr_lambda, last_epoch=-1) 24 | 25 | else: 26 | raise NotImplementedError(f"args.sched == {args.sched}") 27 | 28 | return lr_scheduler 29 | -------------------------------------------------------------------------------- /Method/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Method/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Method/utils/__pycache__/checkpointer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/checkpointer.cpython-37.pyc -------------------------------------------------------------------------------- /Method/utils/__pycache__/checkpointer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/checkpointer.cpython-38.pyc -------------------------------------------------------------------------------- /Method/utils/__pycache__/hdfs_io.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/hdfs_io.cpython-37.pyc -------------------------------------------------------------------------------- /Method/utils/__pycache__/hdfs_io.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/hdfs_io.cpython-38.pyc -------------------------------------------------------------------------------- /Method/utils/__pycache__/torch_io.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/torch_io.cpython-37.pyc -------------------------------------------------------------------------------- /Method/utils/__pycache__/torch_io.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/torch_io.cpython-38.pyc -------------------------------------------------------------------------------- /Method/utils/checkpointer.py: -------------------------------------------------------------------------------- 1 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276) 2 | # Github: https://github.com/zengyan-97/X-VLM 3 | # Copyright (c) 2022, ByteDance Inc. 4 | # All rights reserved. 5 | 6 | from typing import Union, Dict, List, Tuple, Any, Callable 7 | import logging 8 | import os 9 | import re 10 | import time 11 | 12 | import torch 13 | 14 | from utils.hdfs_io import hexists, hmkdir, hcopy 15 | from utils.torch_io import save as hdfs_torch_save 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class Checkpointer: 20 | def __init__(self, 21 | serialization_dir: str = ".output") -> None: 22 | self._serialization_dir = serialization_dir 23 | if not hexists(self._serialization_dir): 24 | hmkdir(self._serialization_dir) 25 | 26 | def save_checkpoint(self, 27 | epoch: Union[int, str], 28 | model_state: Dict[str, Any], 29 | training_states: Dict[str, Any], 30 | step: int = -1) -> None: 31 | """ 32 | Save ckpt to local or HDFS 33 | """ 34 | if step > 0: 35 | model_path = os.path.join( 36 | self._serialization_dir, "model_state_step_{}.th".format(step)) 37 | hdfs_torch_save(model_state, model_path) 38 | 39 | else: 40 | model_path = os.path.join( 41 | self._serialization_dir, "model_state_epoch_{}.th".format(epoch)) 42 | 43 | training_path = os.path.join(self._serialization_dir, 44 | "training_state_latest.th") 45 | hdfs_torch_save(model_state, model_path) 46 | hdfs_torch_save({**training_states, "epoch": epoch}, training_path) 47 | -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/cider/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/cider/cider.py: -------------------------------------------------------------------------------- 1 | # Filename: cider.py 2 | # 3 | # 4 | # Description: Describes the class to compute the CIDEr 5 | # (Consensus-Based Image Description Evaluation) Metric 6 | # by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726) 7 | # 8 | # Creation Date: Sun Feb 8 14:16:54 2015 9 | # 10 | # Authors: Ramakrishna Vedantam and 11 | # Tsung-Yi Lin 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | from .cider_scorer import CiderScorer 17 | 18 | 19 | class Cider: 20 | """ 21 | Main Class to compute the CIDEr metric 22 | 23 | """ 24 | def __init__(self, n=4, df="corpus"): 25 | """ 26 | Initialize the CIDEr scoring function 27 | : param n (int): n-gram size 28 | : param df (string): specifies where to get the IDF values from 29 | takes values 'corpus', 'coco-train' 30 | : return: None 31 | """ 32 | # set cider to sum over 1 to 4-grams 33 | self._n = n 34 | self._df = df 35 | self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df) 36 | 37 | def compute_score(self, gts, res): 38 | """ 39 | Main function to compute CIDEr score 40 | : param gts (dict) : {image:tokenized reference sentence} 41 | : param res (dict) : {image:tokenized candidate sentence} 42 | : return: cider (float) : computed CIDEr score for the corpus 43 | """ 44 | 45 | # clear all the previous hypos and refs 46 | self.cider_scorer.clear() 47 | 48 | for res_id in res: 49 | 50 | hypo = res_id['caption'] 51 | ref = gts[res_id['image_id']] 52 | 53 | # Sanity check. 54 | assert(type(hypo) is list) 55 | assert(len(hypo) == 1) 56 | assert(type(ref) is list) 57 | assert(len(ref) > 0) 58 | self.cider_scorer += (hypo[0], ref) 59 | 60 | (score, scores) = self.cider_scorer.compute_score() 61 | 62 | return score, scores 63 | 64 | def method(self): 65 | return "CIDEr" 66 | -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/ciderD/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/ciderD/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/ciderD/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD.cpython-37.pyc -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD.cpython-38.pyc -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD_scorer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD_scorer.cpython-37.pyc -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD_scorer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD_scorer.cpython-38.pyc -------------------------------------------------------------------------------- /Method/utils/cider/pyciderevalcap/ciderD/ciderD.py: -------------------------------------------------------------------------------- 1 | # Filename: ciderD.py 2 | # 3 | # Description: Describes the class to compute the CIDEr-D (Consensus-Based Image Description Evaluation) Metric 4 | # by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726) 5 | # 6 | # Creation Date: Sun Feb 8 14:16:54 2015 7 | # 8 | # Authors: Ramakrishna Vedantam and Tsung-Yi Lin 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | from .ciderD_scorer import CiderScorer 14 | import pdb 15 | 16 | class CiderD: 17 | """ 18 | Main Class to compute the CIDEr metric 19 | 20 | """ 21 | def __init__(self, n=4, sigma=6.0, df="corpus"): 22 | # set cider to sum over 1 to 4-grams 23 | self._n = n 24 | # set the standard deviation parameter for gaussian penalty 25 | self._sigma = sigma 26 | # set which where to compute document frequencies from 27 | self._df = df 28 | self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df) 29 | 30 | def compute_score(self, gts, res): 31 | """ 32 | Main function to compute CIDEr score 33 | :param hypo_for_image (dict) : dictionary with key and value 34 | ref_for_image (dict) : dictionary with key and value 35 | :return: cider (float) : computed CIDEr score for the corpus 36 | """ 37 | 38 | # clear all the previous hypos and refs 39 | tmp_cider_scorer = self.cider_scorer.copy_empty() 40 | tmp_cider_scorer.clear() 41 | for res_id in res: 42 | 43 | hypo = res_id['caption'] 44 | ref = gts[res_id['image_id']] 45 | 46 | # Sanity check. 47 | assert(type(hypo) is list) 48 | assert(len(hypo) == 1) 49 | assert(type(ref) is list) 50 | assert(len(ref) > 0) 51 | tmp_cider_scorer += (hypo[0], ref) 52 | 53 | (score, scores) = tmp_cider_scorer.compute_score() 54 | 55 | return score, scores 56 | 57 | def method(self): 58 | return "CIDEr-D" 59 | -------------------------------------------------------------------------------- /Method/utils/hdfs_io.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276) 4 | # Github: https://github.com/zengyan-97/X-VLM 5 | # Copyright (c) 2022, ByteDance Inc. 6 | # All rights reserved. 7 | 8 | import sys 9 | from typing import IO, Any, List 10 | 11 | import shutil 12 | import subprocess 13 | from contextlib import contextmanager 14 | import os 15 | import glob 16 | import threading 17 | 18 | HADOOP_BIN = 'HADOOP_ROOT_LOGGER=ERROR,console /SET/PATH/TO/hadoop/bin/hdfs' 19 | 20 | __all__ = ['hlist_files', 'hopen', 'hexists', 'hmkdir'] 21 | 22 | 23 | @contextmanager # type: ignore 24 | def hopen(hdfs_path: str, mode: str = "r") -> IO[Any]: 25 | """ 26 | open a file on hdfs with contextmanager. 27 | 28 | Args: 29 | mode (str): supports ["r", "w", "wa"] 30 | """ 31 | pipe = None 32 | if mode.startswith("r"): 33 | pipe = subprocess.Popen( 34 | "{} dfs -text {}".format(HADOOP_BIN, hdfs_path), shell=True, stdout=subprocess.PIPE) 35 | yield pipe.stdout 36 | pipe.stdout.close() # type: ignore 37 | pipe.wait() 38 | return 39 | if mode == "wa" or mode == "a": 40 | pipe = subprocess.Popen( 41 | "{} dfs -appendToFile - {}".format(HADOOP_BIN, hdfs_path), shell=True, stdin=subprocess.PIPE) 42 | yield pipe.stdin 43 | pipe.stdin.close() # type: ignore 44 | pipe.wait() 45 | return 46 | if mode.startswith("w"): 47 | pipe = subprocess.Popen( 48 | "{} dfs -put -f - {}".format(HADOOP_BIN, hdfs_path), shell=True, stdin=subprocess.PIPE) 49 | yield pipe.stdin 50 | pipe.stdin.close() # type: ignore 51 | pipe.wait() 52 | return 53 | raise RuntimeError("unsupported io mode: {}".format(mode)) 54 | 55 | 56 | def hlist_files(folders: List[str]) -> List[str]: 57 | files = [] 58 | for folder in folders: 59 | if folder.startswith('hdfs'): 60 | pipe = subprocess.Popen("{} dfs -ls {}".format(HADOOP_BIN, folder), shell=True, 61 | stdout=subprocess.PIPE) 62 | # output, _ = pipe.communicate() 63 | for line in pipe.stdout: # type: ignore 64 | line = line.strip() 65 | # drwxr-xr-x - user group 4 file 66 | if len(line.split()) < 5: 67 | continue 68 | files.append(line.split()[-1].decode("utf8")) 69 | pipe.stdout.close() # type: ignore 70 | pipe.wait() 71 | else: 72 | if os.path.isdir(folder): 73 | files.extend([os.path.join(folder, d) for d in os.listdir(folder)]) 74 | elif os.path.isfile(folder): 75 | files.append(folder) 76 | else: 77 | print('Path {} is invalid'.format(folder)) 78 | sys.stdout.flush() 79 | 80 | return files 81 | 82 | 83 | def hexists(file_path: str) -> bool: 84 | """ hdfs capable to check whether a file_path is exists """ 85 | if file_path.startswith('hdfs'): 86 | return os.system("{} dfs -test -e {}".format(HADOOP_BIN, file_path)) == 0 87 | return os.path.exists(file_path) 88 | 89 | 90 | def hmkdir(file_path: str) -> bool: 91 | """ hdfs mkdir """ 92 | if file_path.startswith('hdfs'): 93 | os.system("{} dfs -mkdir -p {}".format(HADOOP_BIN, file_path)) # exist ok 94 | else: 95 | if not os.path.exists(file_path): 96 | os.mkdir(file_path) 97 | return True 98 | 99 | 100 | def hcopy(from_path: str, to_path: str) -> bool: 101 | """ hdfs copy """ 102 | if to_path.startswith("hdfs"): 103 | if from_path.startswith("hdfs"): 104 | os.system("{} dfs -cp -f {} {}".format(HADOOP_BIN, from_path, to_path)) 105 | else: 106 | os.system("{} dfs -copyFromLocal -f {} {}".format(HADOOP_BIN, from_path, to_path)) 107 | else: 108 | if from_path.startswith("hdfs"): 109 | os.system("{} dfs -text {} > {}".format(HADOOP_BIN, from_path, to_path)) 110 | else: 111 | shutil.copy(from_path, to_path) 112 | return True 113 | 114 | -------------------------------------------------------------------------------- /Method/utils/torch_io.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276) 4 | # Github: https://github.com/zengyan-97/X-VLM 5 | # Copyright (c) 2022, ByteDance Inc. 6 | # All rights reserved. 7 | 8 | import io 9 | import torch 10 | 11 | from .hdfs_io import hopen 12 | 13 | 14 | def load(filepath: str, **kwargs): 15 | """ load model """ 16 | if not filepath.startswith("hdfs://"): 17 | return torch.load(filepath, **kwargs) 18 | with hopen(filepath, "rb") as reader: 19 | accessor = io.BytesIO(reader.read()) 20 | state_dict = torch.load(accessor, **kwargs) 21 | del accessor 22 | return state_dict 23 | 24 | 25 | def save(obj, filepath: str, **kwargs): 26 | """ save model """ 27 | if filepath.startswith("hdfs://"): 28 | with hopen(filepath, "wb") as writer: 29 | torch.save(obj, writer, **kwargs) 30 | else: 31 | torch.save(obj, filepath, **kwargs) 32 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch -f https://download.pytorch.org/whl/cu118 2 | torchvision -f https://download.pytorch.org/whl/cu118 3 | torchaudio -f https://download.pytorch.org/whl/cu118 4 | 5 | 6 | 7 | 8 | timm==0.4.9 9 | transformers==4.12.5 10 | ruamel_yaml 11 | opencv-python-headless 12 | scikit-image 13 | matplotlib 14 | chardet 15 | charset_normalizer 16 | PyOpenGL 17 | 18 | 19 | 20 | pycocotools 21 | pycocoevalcap 22 | 23 | --------------------------------------------------------------------------------