├── .DS_Store
├── Annotation_Pipeline
    ├── Phase I
    │   ├── Modality_Expansion.py
    │   └── minigpt4
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-38.pyc
    │   │       └── __init__.cpython-39.pyc
    │   │   ├── common
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-38.pyc
    │   │       │   ├── __init__.cpython-39.pyc
    │   │       │   ├── config.cpython-39.pyc
    │   │       │   ├── dist_utils.cpython-38.pyc
    │   │       │   ├── dist_utils.cpython-39.pyc
    │   │       │   ├── logger.cpython-38.pyc
    │   │       │   ├── logger.cpython-39.pyc
    │   │       │   ├── registry.cpython-38.pyc
    │   │       │   ├── registry.cpython-39.pyc
    │   │       │   ├── utils.cpython-38.pyc
    │   │       │   └── utils.cpython-39.pyc
    │   │       ├── config.py
    │   │       ├── dist_utils.py
    │   │       ├── gradcam.py
    │   │       ├── logger.py
    │   │       ├── optims.py
    │   │       ├── registry.py
    │   │       └── utils.py
    │   │   ├── configs
    │   │       ├── datasets
    │   │       │   ├── cc_sbu
    │   │       │   │   ├── align.yaml
    │   │       │   │   └── defaults.yaml
    │   │       │   └── laion
    │   │       │   │   └── defaults.yaml
    │   │       ├── default.yaml
    │   │       └── models
    │   │       │   └── minigpt4.yaml
    │   │   ├── conversation
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-39.pyc
    │   │       │   └── conversation.cpython-39.pyc
    │   │       └── conversation.py
    │   │   ├── datasets
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-38.pyc
    │   │       │   ├── __init__.cpython-39.pyc
    │   │       │   └── data_utils.cpython-39.pyc
    │   │       ├── builders
    │   │       │   ├── __init__.py
    │   │       │   ├── __pycache__
    │   │       │   │   ├── __init__.cpython-38.pyc
    │   │       │   │   ├── __init__.cpython-39.pyc
    │   │       │   │   ├── base_dataset_builder.cpython-38.pyc
    │   │       │   │   ├── base_dataset_builder.cpython-39.pyc
    │   │       │   │   ├── image_text_pair_builder.cpython-38.pyc
    │   │       │   │   └── image_text_pair_builder.cpython-39.pyc
    │   │       │   ├── base_dataset_builder.py
    │   │       │   └── image_text_pair_builder.py
    │   │       ├── data_utils.py
    │   │       └── datasets
    │   │       │   ├── __init__.py
    │   │       │   ├── __pycache__
    │   │       │       ├── __init__.cpython-38.pyc
    │   │       │       ├── __init__.cpython-39.pyc
    │   │       │       ├── base_dataset.cpython-38.pyc
    │   │       │       ├── base_dataset.cpython-39.pyc
    │   │       │       ├── caption_datasets.cpython-38.pyc
    │   │       │       ├── caption_datasets.cpython-39.pyc
    │   │       │       ├── cc_sbu_dataset.cpython-38.pyc
    │   │       │       ├── cc_sbu_dataset.cpython-39.pyc
    │   │       │       ├── dataloader_utils.cpython-39.pyc
    │   │       │       ├── laion_dataset.cpython-38.pyc
    │   │       │       └── laion_dataset.cpython-39.pyc
    │   │       │   ├── base_dataset.py
    │   │       │   ├── caption_datasets.py
    │   │       │   ├── cc_sbu_dataset.py
    │   │       │   ├── dataloader_utils.py
    │   │       │   └── laion_dataset.py
    │   │   ├── models
    │   │       ├── Qformer.py
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── Qformer.cpython-38.pyc
    │   │       │   ├── Qformer.cpython-39.pyc
    │   │       │   ├── __init__.cpython-38.pyc
    │   │       │   ├── __init__.cpython-39.pyc
    │   │       │   ├── base_model.cpython-38.pyc
    │   │       │   ├── base_model.cpython-39.pyc
    │   │       │   ├── blip2.cpython-38.pyc
    │   │       │   ├── blip2.cpython-39.pyc
    │   │       │   ├── eva_vit.cpython-38.pyc
    │   │       │   ├── eva_vit.cpython-39.pyc
    │   │       │   ├── mini_gpt4.cpython-38.pyc
    │   │       │   ├── mini_gpt4.cpython-39.pyc
    │   │       │   ├── modeling_llama.cpython-38.pyc
    │   │       │   └── modeling_llama.cpython-39.pyc
    │   │       ├── base_model.py
    │   │       ├── blip2.py
    │   │       ├── blip2_outputs.py
    │   │       ├── eva_vit.py
    │   │       ├── mini_gpt4.py
    │   │       └── modeling_llama.py
    │   │   ├── processors
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-38.pyc
    │   │       │   ├── __init__.cpython-39.pyc
    │   │       │   ├── base_processor.cpython-38.pyc
    │   │       │   ├── base_processor.cpython-39.pyc
    │   │       │   ├── blip_processors.cpython-38.pyc
    │   │       │   ├── blip_processors.cpython-39.pyc
    │   │       │   ├── randaugment.cpython-38.pyc
    │   │       │   └── randaugment.cpython-39.pyc
    │   │       ├── base_processor.py
    │   │       ├── blip_processors.py
    │   │       └── randaugment.py
    │   │   ├── runners
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-39.pyc
    │   │       │   └── runner_base.cpython-39.pyc
    │   │       └── runner_base.py
    │   │   └── tasks
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │           ├── __init__.cpython-39.pyc
    │   │           ├── base_task.cpython-39.pyc
    │   │           └── image_text_pretrain.cpython-39.pyc
    │   │       ├── base_task.py
    │   │       └── image_text_pretrain.py
    └── Phase II
    │   ├── GroundingDINO
    │       ├── groundingdino
    │       │   ├── _C.cpython-37m-x86_64-linux-gnu.so
    │       │   ├── _C.cpython-38-x86_64-linux-gnu.so
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-37.pyc
    │       │   │   └── __init__.cpython-38.pyc
    │       │   ├── config
    │       │   │   ├── GroundingDINO_SwinB.py
    │       │   │   └── GroundingDINO_SwinT_OGC.py
    │       │   ├── datasets
    │       │   │   ├── __init__.py
    │       │   │   ├── __pycache__
    │       │   │   │   ├── __init__.cpython-37.pyc
    │       │   │   │   ├── __init__.cpython-38.pyc
    │       │   │   │   ├── transforms.cpython-37.pyc
    │       │   │   │   └── transforms.cpython-38.pyc
    │       │   │   └── transforms.py
    │       │   ├── models
    │       │   │   ├── GroundingDINO
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── __pycache__
    │       │   │   │   │   ├── __init__.cpython-37.pyc
    │       │   │   │   │   ├── __init__.cpython-38.pyc
    │       │   │   │   │   ├── bertwarper.cpython-37.pyc
    │       │   │   │   │   ├── bertwarper.cpython-38.pyc
    │       │   │   │   │   ├── fuse_modules.cpython-37.pyc
    │       │   │   │   │   ├── fuse_modules.cpython-38.pyc
    │       │   │   │   │   ├── groundingdino.cpython-37.pyc
    │       │   │   │   │   ├── groundingdino.cpython-38.pyc
    │       │   │   │   │   ├── ms_deform_attn.cpython-37.pyc
    │       │   │   │   │   ├── ms_deform_attn.cpython-38.pyc
    │       │   │   │   │   ├── transformer.cpython-37.pyc
    │       │   │   │   │   ├── transformer.cpython-38.pyc
    │       │   │   │   │   ├── transformer_vanilla.cpython-37.pyc
    │       │   │   │   │   ├── transformer_vanilla.cpython-38.pyc
    │       │   │   │   │   ├── utils.cpython-37.pyc
    │       │   │   │   │   └── utils.cpython-38.pyc
    │       │   │   │   ├── backbone
    │       │   │   │   │   ├── __init__.py
    │       │   │   │   │   ├── __pycache__
    │       │   │   │   │   │   ├── __init__.cpython-37.pyc
    │       │   │   │   │   │   ├── __init__.cpython-38.pyc
    │       │   │   │   │   │   ├── backbone.cpython-37.pyc
    │       │   │   │   │   │   ├── backbone.cpython-38.pyc
    │       │   │   │   │   │   ├── position_encoding.cpython-37.pyc
    │       │   │   │   │   │   ├── position_encoding.cpython-38.pyc
    │       │   │   │   │   │   ├── swin_transformer.cpython-37.pyc
    │       │   │   │   │   │   └── swin_transformer.cpython-38.pyc
    │       │   │   │   │   ├── backbone.py
    │       │   │   │   │   ├── position_encoding.py
    │       │   │   │   │   └── swin_transformer.py
    │       │   │   │   ├── bertwarper.py
    │       │   │   │   ├── csrc
    │       │   │   │   │   ├── MsDeformAttn
    │       │   │   │   │   │   ├── ms_deform_attn.h
    │       │   │   │   │   │   ├── ms_deform_attn_cpu.cpp
    │       │   │   │   │   │   ├── ms_deform_attn_cpu.h
    │       │   │   │   │   │   ├── ms_deform_attn_cuda.cu
    │       │   │   │   │   │   ├── ms_deform_attn_cuda.h
    │       │   │   │   │   │   └── ms_deform_im2col_cuda.cuh
    │       │   │   │   │   ├── cuda_version.cu
    │       │   │   │   │   └── vision.cpp
    │       │   │   │   ├── fuse_modules.py
    │       │   │   │   ├── groundingdino.py
    │       │   │   │   ├── ms_deform_attn.py
    │       │   │   │   ├── transformer.py
    │       │   │   │   ├── transformer_vanilla.py
    │       │   │   │   └── utils.py
    │       │   │   ├── __init__.py
    │       │   │   ├── __pycache__
    │       │   │   │   ├── __init__.cpython-37.pyc
    │       │   │   │   ├── __init__.cpython-38.pyc
    │       │   │   │   ├── registry.cpython-37.pyc
    │       │   │   │   └── registry.cpython-38.pyc
    │       │   │   └── registry.py
    │       │   ├── util
    │       │   │   ├── __init__.py
    │       │   │   ├── __pycache__
    │       │   │   │   ├── __init__.cpython-37.pyc
    │       │   │   │   ├── __init__.cpython-38.pyc
    │       │   │   │   ├── box_ops.cpython-37.pyc
    │       │   │   │   ├── box_ops.cpython-38.pyc
    │       │   │   │   ├── get_tokenlizer.cpython-37.pyc
    │       │   │   │   ├── get_tokenlizer.cpython-38.pyc
    │       │   │   │   ├── inference.cpython-37.pyc
    │       │   │   │   ├── inference.cpython-38.pyc
    │       │   │   │   ├── misc.cpython-37.pyc
    │       │   │   │   ├── misc.cpython-38.pyc
    │       │   │   │   ├── slconfig.cpython-37.pyc
    │       │   │   │   ├── slconfig.cpython-38.pyc
    │       │   │   │   ├── utils.cpython-37.pyc
    │       │   │   │   ├── utils.cpython-38.pyc
    │       │   │   │   ├── visualizer.cpython-37.pyc
    │       │   │   │   ├── visualizer.cpython-38.pyc
    │       │   │   │   ├── vl_utils.cpython-37.pyc
    │       │   │   │   └── vl_utils.cpython-38.pyc
    │       │   │   ├── box_ops.py
    │       │   │   ├── get_tokenlizer.py
    │       │   │   ├── inference.py
    │       │   │   ├── logger.py
    │       │   │   ├── misc.py
    │       │   │   ├── slconfig.py
    │       │   │   ├── slio.py
    │       │   │   ├── time_counter.py
    │       │   │   ├── utils.py
    │       │   │   ├── visualizer.py
    │       │   │   └── vl_utils.py
    │       │   └── version.py
    │       └── setup.py
    │   ├── generation.py
    │   ├── generation_mask.py
    │   └── segment_anything_
    │       ├── segment_anything
    │           ├── __init__.py
    │           ├── __pycache__
    │           │   ├── __init__.cpython-37.pyc
    │           │   ├── __init__.cpython-38.pyc
    │           │   ├── automatic_mask_generator.cpython-37.pyc
    │           │   ├── automatic_mask_generator.cpython-38.pyc
    │           │   ├── build_sam.cpython-37.pyc
    │           │   ├── build_sam.cpython-38.pyc
    │           │   ├── predictor.cpython-37.pyc
    │           │   └── predictor.cpython-38.pyc
    │           ├── automatic_mask_generator.py
    │           ├── build_sam.py
    │           ├── modeling
    │           │   ├── __init__.py
    │           │   ├── __pycache__
    │           │   │   ├── __init__.cpython-37.pyc
    │           │   │   ├── __init__.cpython-38.pyc
    │           │   │   ├── common.cpython-37.pyc
    │           │   │   ├── common.cpython-38.pyc
    │           │   │   ├── image_encoder.cpython-37.pyc
    │           │   │   ├── image_encoder.cpython-38.pyc
    │           │   │   ├── mask_decoder.cpython-37.pyc
    │           │   │   ├── mask_decoder.cpython-38.pyc
    │           │   │   ├── prompt_encoder.cpython-37.pyc
    │           │   │   ├── prompt_encoder.cpython-38.pyc
    │           │   │   ├── sam.cpython-37.pyc
    │           │   │   ├── sam.cpython-38.pyc
    │           │   │   ├── transformer.cpython-37.pyc
    │           │   │   └── transformer.cpython-38.pyc
    │           │   ├── common.py
    │           │   ├── image_encoder.py
    │           │   ├── mask_decoder.py
    │           │   ├── prompt_encoder.py
    │           │   ├── sam.py
    │           │   └── transformer.py
    │           ├── predictor.py
    │           └── utils
    │           │   ├── __init__.py
    │           │   ├── __pycache__
    │           │       ├── __init__.cpython-37.pyc
    │           │       ├── __init__.cpython-38.pyc
    │           │       ├── amg.cpython-37.pyc
    │           │       ├── amg.cpython-38.pyc
    │           │       ├── transforms.cpython-37.pyc
    │           │       └── transforms.cpython-38.pyc
    │           │   ├── amg.py
    │           │   ├── onnx.py
    │           │   └── transforms.py
    │       └── setup.py
├── Method
    ├── .DS_Store
    ├── __pycache__
    │   ├── openai.cpython-38.pyc
    │   ├── optim.cpython-37.pyc
    │   ├── optim.cpython-38.pyc
    │   ├── scheduler.cpython-37.pyc
    │   └── scheduler.cpython-38.pyc
    ├── accelerators
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── accelerator.cpython-37.pyc
    │   │   └── apex_ddp_accelerator.cpython-37.pyc
    │   ├── accelerator.py
    │   └── apex_ddp_accelerator.py
    ├── config.yaml
    ├── configs
    │   ├── config_bert.json
    │   ├── config_clipvitB.json
    │   ├── config_roberta.json
    │   ├── config_swinB_224.json
    │   ├── config_swinB_384.json
    │   ├── config_swinB_480.json
    │   ├── re_bbox.yaml
    │   └── vlue-base-test
    │   │   ├── Grounding_bbox.yaml
    │   │   ├── Grounding_weakly.yaml
    │   │   ├── NLVR.yaml
    │   │   ├── Retrieval.yaml
    │   │   ├── VQA.yaml
    │   │   └── VQA_480.yaml
    ├── dataset
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── coco_karpathy_dataset.cpython-37.pyc
    │   │   ├── coco_karpathy_dataset.cpython-38.pyc
    │   │   ├── dist_dataset.cpython-37.pyc
    │   │   ├── dist_dataset.cpython-38.pyc
    │   │   ├── grounding_dataset.cpython-37.pyc
    │   │   ├── grounding_dataset.cpython-38.pyc
    │   │   ├── nlvr_dataset.cpython-37.pyc
    │   │   ├── nlvr_dataset.cpython-38.pyc
    │   │   ├── pretrain_dataset.cpython-37.pyc
    │   │   ├── pretrain_dataset.cpython-38.pyc
    │   │   ├── randaugment.cpython-37.pyc
    │   │   ├── randaugment.cpython-38.pyc
    │   │   ├── re_bbox_dataset.cpython-38.pyc
    │   │   ├── re_dataset.cpython-37.pyc
    │   │   ├── re_dataset.cpython-38.pyc
    │   │   ├── utils.cpython-37.pyc
    │   │   ├── utils.cpython-38.pyc
    │   │   ├── vqa_dataset.cpython-37.pyc
    │   │   └── vqa_dataset.cpython-38.pyc
    │   ├── coco_karpathy_dataset.py
    │   ├── dist_dataset.py
    │   ├── randaugment.py
    │   ├── re_bbox_dataset.py
    │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── box_ops.cpython-37.pyc
    │   │   ├── box_ops.cpython-38.pyc
    │   │   ├── clip_vit.cpython-37.pyc
    │   │   ├── clip_vit.cpython-38.pyc
    │   │   ├── model_bbox.cpython-38.pyc
    │   │   ├── model_pretrain.cpython-37.pyc
    │   │   ├── model_re_bbox.cpython-38.pyc
    │   │   ├── model_retrieval.cpython-37.pyc
    │   │   ├── model_retrieval.cpython-38.pyc
    │   │   ├── model_vqa.cpython-38.pyc
    │   │   ├── swin_transformer.cpython-37.pyc
    │   │   ├── swin_transformer.cpython-38.pyc
    │   │   ├── tokenization_bert.cpython-37.pyc
    │   │   ├── tokenization_bert.cpython-38.pyc
    │   │   ├── tokenization_roberta.cpython-37.pyc
    │   │   ├── tokenization_roberta.cpython-38.pyc
    │   │   ├── vit.cpython-37.pyc
    │   │   ├── vit.cpython-38.pyc
    │   │   ├── xbert.cpython-37.pyc
    │   │   ├── xbert.cpython-38.pyc
    │   │   ├── xroberta.cpython-37.pyc
    │   │   ├── xroberta.cpython-38.pyc
    │   │   ├── xvlm.cpython-37.pyc
    │   │   └── xvlm.cpython-38.pyc
    │   ├── box_ops.py
    │   ├── clip_vit.py
    │   ├── model_re_bbox.py
    │   ├── swin_transformer.py
    │   ├── test.py
    │   ├── tokenization_bert.py
    │   ├── tokenization_roberta.py
    │   ├── vit.py
    │   ├── xbert.py
    │   ├── xroberta.py
    │   └── xvlm.py
    ├── optim.py
    ├── output
    │   └── all_output_eva
    │   │   └── config.yaml
    ├── re_bbox.py
    ├── requirements.txt
    ├── run.py
    ├── scheduler.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-37.pyc
    │       ├── __init__.cpython-38.pyc
    │       ├── checkpointer.cpython-37.pyc
    │       ├── checkpointer.cpython-38.pyc
    │       ├── hdfs_io.cpython-37.pyc
    │       ├── hdfs_io.cpython-38.pyc
    │       ├── torch_io.cpython-37.pyc
    │       └── torch_io.cpython-38.pyc
    │   ├── checkpointer.py
    │   ├── cider
    │       └── pyciderevalcap
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │       ├── __init__.cpython-37.pyc
    │       │       └── __init__.cpython-38.pyc
    │       │   ├── cider
    │       │       ├── __init__.py
    │       │       ├── cider.py
    │       │       └── cider_scorer.py
    │       │   └── ciderD
    │       │       ├── __init__.py
    │       │       ├── __pycache__
    │       │           ├── __init__.cpython-37.pyc
    │       │           ├── __init__.cpython-38.pyc
    │       │           ├── ciderD.cpython-37.pyc
    │       │           ├── ciderD.cpython-38.pyc
    │       │           ├── ciderD_scorer.cpython-37.pyc
    │       │           └── ciderD_scorer.cpython-38.pyc
    │       │       ├── ciderD.py
    │       │       └── ciderD_scorer.py
    │   ├── hdfs_io.py
    │   └── torch_io.py
├── README.md
└── requirements.txt


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/.DS_Store


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright (c) 2022, salesforce.com, inc.
 3 |  All rights reserved.
 4 |  SPDX-License-Identifier: BSD-3-Clause
 5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | """
 7 | 
 8 | import os
 9 | import sys
10 | 
11 | from omegaconf import OmegaConf
12 | 
13 | from minigpt4.common.registry import registry
14 | 
15 | from minigpt4.datasets.builders import *
16 | from minigpt4.models import *
17 | from minigpt4.processors import *
18 | from minigpt4.tasks import *
19 | 
20 | 
21 | root_dir = os.path.dirname(os.path.abspath(__file__))
22 | default_cfg = OmegaConf.load(os.path.join(root_dir, "configs/default.yaml"))
23 | 
24 | registry.register_path("library_root", root_dir)
25 | repo_root = os.path.join(root_dir, "..")
26 | registry.register_path("repo_root", repo_root)
27 | cache_root = os.path.join(repo_root, default_cfg.env.cache_root)
28 | registry.register_path("cache_root", cache_root)
29 | 
30 | registry.register("MAX_INT", sys.maxsize)
31 | registry.register("SPLIT_NAMES", ["train", "val", "test"])
32 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__init__.py


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/config.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/config.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/dist_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/dist_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/dist_utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/dist_utils.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/logger.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/logger.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/logger.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/logger.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/registry.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/registry.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/registry.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/registry.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/common/__pycache__/utils.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/dist_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 |  Copyright (c) 2022, salesforce.com, inc.
  3 |  All rights reserved.
  4 |  SPDX-License-Identifier: BSD-3-Clause
  5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
  6 | """
  7 | 
  8 | import datetime
  9 | import functools
 10 | import os
 11 | 
 12 | import torch
 13 | import torch.distributed as dist
 14 | import timm.models.hub as timm_hub
 15 | 
 16 | 
 17 | def setup_for_distributed(is_master):
 18 |     """
 19 |     This function disables printing when not in master process
 20 |     """
 21 |     import builtins as __builtin__
 22 | 
 23 |     builtin_print = __builtin__.print
 24 | 
 25 |     def print(*args, **kwargs):
 26 |         force = kwargs.pop("force", False)
 27 |         if is_master or force:
 28 |             builtin_print(*args, **kwargs)
 29 | 
 30 |     __builtin__.print = print
 31 | 
 32 | 
 33 | def is_dist_avail_and_initialized():
 34 |     if not dist.is_available():
 35 |         return False
 36 |     if not dist.is_initialized():
 37 |         return False
 38 |     return True
 39 | 
 40 | 
 41 | def get_world_size():
 42 |     if not is_dist_avail_and_initialized():
 43 |         return 1
 44 |     return dist.get_world_size()
 45 | 
 46 | 
 47 | def get_rank():
 48 |     if not is_dist_avail_and_initialized():
 49 |         return 0
 50 |     return dist.get_rank()
 51 | 
 52 | 
 53 | def is_main_process():
 54 |     return get_rank() == 0
 55 | 
 56 | 
 57 | def init_distributed_mode(args):
 58 |     if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
 59 |         args.rank = int(os.environ["RANK"])
 60 |         args.world_size = int(os.environ["WORLD_SIZE"])
 61 |         args.gpu = int(os.environ["LOCAL_RANK"])
 62 |     elif "SLURM_PROCID" in os.environ:
 63 |         args.rank = int(os.environ["SLURM_PROCID"])
 64 |         args.gpu = args.rank % torch.cuda.device_count()
 65 |     else:
 66 |         print("Not using distributed mode")
 67 |         args.distributed = False
 68 |         return
 69 | 
 70 |     args.distributed = True
 71 | 
 72 |     torch.cuda.set_device(args.gpu)
 73 |     args.dist_backend = "nccl"
 74 |     print(
 75 |         "| distributed init (rank {}, world {}): {}".format(
 76 |             args.rank, args.world_size, args.dist_url
 77 |         ),
 78 |         flush=True,
 79 |     )
 80 |     torch.distributed.init_process_group(
 81 |         backend=args.dist_backend,
 82 |         init_method=args.dist_url,
 83 |         world_size=args.world_size,
 84 |         rank=args.rank,
 85 |         timeout=datetime.timedelta(
 86 |             days=365
 87 |         ),  # allow auto-downloading and de-compressing
 88 |     )
 89 |     torch.distributed.barrier()
 90 |     setup_for_distributed(args.rank == 0)
 91 | 
 92 | 
 93 | def get_dist_info():
 94 |     if torch.__version__ < "1.0":
 95 |         initialized = dist._initialized
 96 |     else:
 97 |         initialized = dist.is_initialized()
 98 |     if initialized:
 99 |         rank = dist.get_rank()
100 |         world_size = dist.get_world_size()
101 |     else:  # non-distributed training
102 |         rank = 0
103 |         world_size = 1
104 |     return rank, world_size
105 | 
106 | 
107 | def main_process(func):
108 |     @functools.wraps(func)
109 |     def wrapper(*args, **kwargs):
110 |         rank, _ = get_dist_info()
111 |         if rank == 0:
112 |             return func(*args, **kwargs)
113 | 
114 |     return wrapper
115 | 
116 | 
117 | def download_cached_file(url, check_hash=True, progress=False):
118 |     """
119 |     Download a file from a URL and cache it locally. If the file already exists, it is not downloaded again.
120 |     If distributed, only the main process downloads the file, and the other processes wait for the file to be downloaded.
121 |     """
122 | 
123 |     def get_cached_file_path():
124 |         # a hack to sync the file path across processes
125 |         parts = torch.hub.urlparse(url)
126 |         filename = os.path.basename(parts.path)
127 |         cached_file = os.path.join(timm_hub.get_cache_dir(), filename)
128 | 
129 |         return cached_file
130 | 
131 |     if is_main_process():
132 |         timm_hub.download_cached_file(url, check_hash, progress)
133 | 
134 |     if is_dist_avail_and_initialized():
135 |         dist.barrier()
136 | 
137 |     return get_cached_file_path()
138 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/gradcam.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from matplotlib import pyplot as plt
 3 | from scipy.ndimage import filters
 4 | from skimage import transform as skimage_transform
 5 | 
 6 | 
 7 | def getAttMap(img, attMap, blur=True, overlap=True):
 8 |     attMap -= attMap.min()
 9 |     if attMap.max() > 0:
10 |         attMap /= attMap.max()
11 |     attMap = skimage_transform.resize(attMap, (img.shape[:2]), order=3, mode="constant")
12 |     if blur:
13 |         attMap = filters.gaussian_filter(attMap, 0.02 * max(img.shape[:2]))
14 |         attMap -= attMap.min()
15 |         attMap /= attMap.max()
16 |     cmap = plt.get_cmap("jet")
17 |     attMapV = cmap(attMap)
18 |     attMapV = np.delete(attMapV, 3, 2)
19 |     if overlap:
20 |         attMap = (
21 |             1 * (1 - attMap**0.7).reshape(attMap.shape + (1,)) * img
22 |             + (attMap**0.7).reshape(attMap.shape + (1,)) * attMapV
23 |         )
24 |     return attMap
25 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/common/optims.py:
--------------------------------------------------------------------------------
  1 | """
  2 |  Copyright (c) 2022, salesforce.com, inc.
  3 |  All rights reserved.
  4 |  SPDX-License-Identifier: BSD-3-Clause
  5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
  6 | """
  7 | 
  8 | import math
  9 | 
 10 | from minigpt4.common.registry import registry
 11 | 
 12 | 
 13 | @registry.register_lr_scheduler("linear_warmup_step_lr")
 14 | class LinearWarmupStepLRScheduler:
 15 |     def __init__(
 16 |         self,
 17 |         optimizer,
 18 |         max_epoch,
 19 |         min_lr,
 20 |         init_lr,
 21 |         decay_rate=1,
 22 |         warmup_start_lr=-1,
 23 |         warmup_steps=0,
 24 |         **kwargs
 25 |     ):
 26 |         self.optimizer = optimizer
 27 | 
 28 |         self.max_epoch = max_epoch
 29 |         self.min_lr = min_lr
 30 | 
 31 |         self.decay_rate = decay_rate
 32 | 
 33 |         self.init_lr = init_lr
 34 |         self.warmup_steps = warmup_steps
 35 |         self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
 36 | 
 37 |     def step(self, cur_epoch, cur_step):
 38 |         if cur_epoch == 0:
 39 |             warmup_lr_schedule(
 40 |                 step=cur_step,
 41 |                 optimizer=self.optimizer,
 42 |                 max_step=self.warmup_steps,
 43 |                 init_lr=self.warmup_start_lr,
 44 |                 max_lr=self.init_lr,
 45 |             )
 46 |         else:
 47 |             step_lr_schedule(
 48 |                 epoch=cur_epoch,
 49 |                 optimizer=self.optimizer,
 50 |                 init_lr=self.init_lr,
 51 |                 min_lr=self.min_lr,
 52 |                 decay_rate=self.decay_rate,
 53 |             )
 54 | 
 55 | 
 56 | @registry.register_lr_scheduler("linear_warmup_cosine_lr")
 57 | class LinearWarmupCosineLRScheduler:
 58 |     def __init__(
 59 |         self,
 60 |         optimizer,
 61 |         max_epoch,
 62 |         iters_per_epoch,
 63 |         min_lr,
 64 |         init_lr,
 65 |         warmup_steps=0,
 66 |         warmup_start_lr=-1,
 67 |         **kwargs
 68 |     ):
 69 |         self.optimizer = optimizer
 70 | 
 71 |         self.max_epoch = max_epoch
 72 |         self.iters_per_epoch = iters_per_epoch
 73 |         self.min_lr = min_lr
 74 | 
 75 |         self.init_lr = init_lr
 76 |         self.warmup_steps = warmup_steps
 77 |         self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
 78 | 
 79 |     def step(self, cur_epoch, cur_step):
 80 |         total_cur_step = cur_epoch * self.iters_per_epoch + cur_step
 81 |         if total_cur_step < self.warmup_steps:
 82 |             warmup_lr_schedule(
 83 |                 step=cur_step,
 84 |                 optimizer=self.optimizer,
 85 |                 max_step=self.warmup_steps,
 86 |                 init_lr=self.warmup_start_lr,
 87 |                 max_lr=self.init_lr,
 88 |             )
 89 |         else:
 90 |             cosine_lr_schedule(
 91 |                 epoch=total_cur_step,
 92 |                 optimizer=self.optimizer,
 93 |                 max_epoch=self.max_epoch * self.iters_per_epoch,
 94 |                 init_lr=self.init_lr,
 95 |                 min_lr=self.min_lr,
 96 |             )
 97 | 
 98 | 
 99 | def cosine_lr_schedule(optimizer, epoch, max_epoch, init_lr, min_lr):
100 |     """Decay the learning rate"""
101 |     lr = (init_lr - min_lr) * 0.5 * (
102 |         1.0 + math.cos(math.pi * epoch / max_epoch)
103 |     ) + min_lr
104 |     for param_group in optimizer.param_groups:
105 |         param_group["lr"] = lr
106 | 
107 | 
108 | def warmup_lr_schedule(optimizer, step, max_step, init_lr, max_lr):
109 |     """Warmup the learning rate"""
110 |     lr = min(max_lr, init_lr + (max_lr - init_lr) * step / max(max_step, 1))
111 |     for param_group in optimizer.param_groups:
112 |         param_group["lr"] = lr
113 | 
114 | 
115 | def step_lr_schedule(optimizer, epoch, init_lr, min_lr, decay_rate):
116 |     """Decay the learning rate"""
117 |     lr = max(min_lr, init_lr * (decay_rate**epoch))
118 |     for param_group in optimizer.param_groups:
119 |         param_group["lr"] = lr
120 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/configs/datasets/cc_sbu/align.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 |   cc_sbu_align:
3 |     data_type: images
4 |     build_info:
5 |       storage: /path/to/cc_sbu_align/
6 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/configs/datasets/cc_sbu/defaults.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 |   cc_sbu:
3 |     data_type: images
4 |     build_info:
5 |       storage: /path/to/cc_sbu_dataset/{00000..01255}.tar
6 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/configs/datasets/laion/defaults.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 |   laion:
3 |     data_type: images
4 |     build_info:
5 |       storage: /path/to/laion_dataset/{00000..10488}.tar
6 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/configs/default.yaml:
--------------------------------------------------------------------------------
1 | env:
2 |   # For default users
3 |   # cache_root: "cache"
4 |   # For internal use with persistent storage
5 |   cache_root: "/export/home/.cache/minigpt4"
6 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/configs/models/minigpt4.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   arch: mini_gpt4
 3 | 
 4 |   # vit encoder
 5 |   image_size: 224
 6 |   drop_path_rate: 0
 7 |   use_grad_checkpoint: False
 8 |   vit_precision: "fp16"
 9 |   freeze_vit: True
10 |   freeze_qformer: True
11 | 
12 |   # Q-Former
13 |   num_query_token: 32
14 | 
15 |   # Vicuna
16 |   llama_model: "/storage_fast/mchu/Multi-model/mini/MiniGPT-4/working"
17 | 
18 |   # generation configs
19 |   prompt: ""
20 | 
21 | preprocess:
22 |     vis_processor:
23 |         train:
24 |           name: "blip2_image_train"
25 |           image_size: 224
26 |         eval:
27 |           name: "blip2_image_eval"
28 |           image_size: 224
29 |     text_processor:
30 |         train:
31 |           name: "blip_caption"
32 |         eval:
33 |           name: "blip_caption"
34 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/conversation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/conversation/__init__.py


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/conversation/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/conversation/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/conversation/__pycache__/conversation.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/conversation/__pycache__/conversation.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/__init__.py


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/data_utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/__pycache__/data_utils.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright (c) 2022, salesforce.com, inc.
 3 |  All rights reserved.
 4 |  SPDX-License-Identifier: BSD-3-Clause
 5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | """
 7 | 
 8 | from minigpt4.datasets.builders.base_dataset_builder import load_dataset_config
 9 | from minigpt4.datasets.builders.image_text_pair_builder import (
10 |     CCSBUBuilder,
11 |     LaionBuilder,
12 |     CCSBUAlignBuilder
13 | )
14 | from minigpt4.common.registry import registry
15 | 
16 | __all__ = [
17 |     "CCSBUBuilder",
18 |     "LaionBuilder",
19 |     "CCSBUAlignBuilder"
20 | ]
21 | 
22 | 
23 | def load_dataset(name, cfg_path=None, vis_path=None, data_type=None):
24 |     """
25 |     Example
26 | 
27 |     >>> dataset = load_dataset("coco_caption", cfg=None)
28 |     >>> splits = dataset.keys()
29 |     >>> print([len(dataset[split]) for split in splits])
30 | 
31 |     """
32 |     if cfg_path is None:
33 |         cfg = None
34 |     else:
35 |         cfg = load_dataset_config(cfg_path)
36 | 
37 |     try:
38 |         builder = registry.get_builder_class(name)(cfg)
39 |     except TypeError:
40 |         print(
41 |             f"Dataset {name} not found. Available datasets:\n"
42 |             + ", ".join([str(k) for k in dataset_zoo.get_names()])
43 |         )
44 |         exit(1)
45 | 
46 |     if vis_path is not None:
47 |         if data_type is None:
48 |             # use default data type in the config
49 |             data_type = builder.config.data_type
50 | 
51 |         assert (
52 |             data_type in builder.config.build_info
53 |         ), f"Invalid data_type {data_type} for {name}."
54 | 
55 |         builder.config.build_info.get(data_type).storage = vis_path
56 | 
57 |     dataset = builder.build_datasets()
58 |     return dataset
59 | 
60 | 
61 | class DatasetZoo:
62 |     def __init__(self) -> None:
63 |         self.dataset_zoo = {
64 |             k: list(v.DATASET_CONFIG_DICT.keys())
65 |             for k, v in sorted(registry.mapping["builder_name_mapping"].items())
66 |         }
67 | 
68 |     def get_names(self):
69 |         return list(self.dataset_zoo.keys())
70 | 
71 | 
72 | dataset_zoo = DatasetZoo()
73 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/base_dataset_builder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/base_dataset_builder.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/base_dataset_builder.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/base_dataset_builder.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/image_text_pair_builder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/image_text_pair_builder.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/image_text_pair_builder.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/__pycache__/image_text_pair_builder.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/builders/image_text_pair_builder.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | import warnings
  4 | 
  5 | from minigpt4.common.registry import registry
  6 | from minigpt4.datasets.builders.base_dataset_builder import BaseDatasetBuilder
  7 | from minigpt4.datasets.datasets.laion_dataset import LaionDataset
  8 | from minigpt4.datasets.datasets.cc_sbu_dataset import CCSBUDataset, CCSBUAlignDataset
  9 | 
 10 | 
 11 | @registry.register_builder("cc_sbu")
 12 | class CCSBUBuilder(BaseDatasetBuilder):
 13 |     train_dataset_cls = CCSBUDataset
 14 | 
 15 |     DATASET_CONFIG_DICT = {"default": "configs/datasets/cc_sbu/defaults.yaml"}
 16 | 
 17 |     def _download_ann(self):
 18 |         pass
 19 | 
 20 |     def _download_vis(self):
 21 |         pass
 22 | 
 23 |     def build(self):
 24 |         self.build_processors()
 25 | 
 26 |         build_info = self.config.build_info
 27 | 
 28 |         datasets = dict()
 29 |         split = "train"
 30 | 
 31 |         # create datasets
 32 |         # [NOTE] return inner_datasets (wds.DataPipeline)
 33 |         dataset_cls = self.train_dataset_cls
 34 |         datasets[split] = dataset_cls(
 35 |             vis_processor=self.vis_processors[split],
 36 |             text_processor=self.text_processors[split],
 37 |             location=build_info.storage,
 38 |         ).inner_dataset
 39 | 
 40 |         return datasets
 41 | 
 42 | 
 43 | @registry.register_builder("laion")
 44 | class LaionBuilder(BaseDatasetBuilder):
 45 |     train_dataset_cls = LaionDataset
 46 | 
 47 |     DATASET_CONFIG_DICT = {"default": "configs/datasets/laion/defaults.yaml"}
 48 | 
 49 |     def _download_ann(self):
 50 |         pass
 51 | 
 52 |     def _download_vis(self):
 53 |         pass
 54 | 
 55 |     def build(self):
 56 |         self.build_processors()
 57 | 
 58 |         build_info = self.config.build_info
 59 | 
 60 |         datasets = dict()
 61 |         split = "train"
 62 | 
 63 |         # create datasets
 64 |         # [NOTE] return inner_datasets (wds.DataPipeline)
 65 |         dataset_cls = self.train_dataset_cls
 66 |         datasets[split] = dataset_cls(
 67 |             vis_processor=self.vis_processors[split],
 68 |             text_processor=self.text_processors[split],
 69 |             location=build_info.storage,
 70 |         ).inner_dataset
 71 | 
 72 |         return datasets
 73 | 
 74 | 
 75 | @registry.register_builder("cc_sbu_align")
 76 | class CCSBUAlignBuilder(BaseDatasetBuilder):
 77 |     train_dataset_cls = CCSBUAlignDataset
 78 | 
 79 |     DATASET_CONFIG_DICT = {
 80 |         "default": "configs/datasets/cc_sbu/align.yaml",
 81 |     }
 82 | 
 83 |     def build_datasets(self):
 84 |         # at this point, all the annotations and image/videos should be all downloaded to the specified locations.
 85 |         logging.info("Building datasets...")
 86 |         self.build_processors()
 87 | 
 88 |         build_info = self.config.build_info
 89 |         storage_path = build_info.storage
 90 | 
 91 |         datasets = dict()
 92 | 
 93 |         if not os.path.exists(storage_path):
 94 |             warnings.warn("storage path {} does not exist.".format(storage_path))
 95 | 
 96 |         # create datasets
 97 |         dataset_cls = self.train_dataset_cls
 98 |         datasets['train'] = dataset_cls(
 99 |             vis_processor=self.vis_processors["train"],
100 |             text_processor=self.text_processors["train"],
101 |             ann_paths=[os.path.join(storage_path, 'filter_cap.json')],
102 |             vis_root=os.path.join(storage_path, 'image'),
103 |         )
104 | 
105 |         return datasets
106 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__init__.py


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/base_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/base_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/base_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/base_dataset.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/caption_datasets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/caption_datasets.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/caption_datasets.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/caption_datasets.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/dataloader_utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/dataloader_utils.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/laion_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/laion_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/laion_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/__pycache__/laion_dataset.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/base_dataset.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright (c) 2022, salesforce.com, inc.
 3 |  All rights reserved.
 4 |  SPDX-License-Identifier: BSD-3-Clause
 5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | """
 7 | 
 8 | import json
 9 | from typing import Iterable
10 | 
11 | from torch.utils.data import Dataset, ConcatDataset
12 | from torch.utils.data.dataloader import default_collate
13 | 
14 | 
15 | class BaseDataset(Dataset):
16 |     def __init__(
17 |         self, vis_processor=None, text_processor=None, vis_root=None, ann_paths=[]
18 |     ):
19 |         """
20 |         vis_root (string): Root directory of images (e.g. coco/images/)
21 |         ann_root (string): directory to store the annotation file
22 |         """
23 |         self.vis_root = vis_root
24 | 
25 |         self.annotation = []
26 |         for ann_path in ann_paths:
27 |             self.annotation.extend(json.load(open(ann_path, "r"))['annotations'])
28 | 
29 |         self.vis_processor = vis_processor
30 |         self.text_processor = text_processor
31 | 
32 |         self._add_instance_ids()
33 | 
34 |     def __len__(self):
35 |         return len(self.annotation)
36 | 
37 |     def collater(self, samples):
38 |         return default_collate(samples)
39 | 
40 |     def set_processors(self, vis_processor, text_processor):
41 |         self.vis_processor = vis_processor
42 |         self.text_processor = text_processor
43 | 
44 |     def _add_instance_ids(self, key="instance_id"):
45 |         for idx, ann in enumerate(self.annotation):
46 |             ann[key] = str(idx)
47 | 
48 | 
49 | class ConcatDataset(ConcatDataset):
50 |     def __init__(self, datasets: Iterable[Dataset]) -> None:
51 |         super().__init__(datasets)
52 | 
53 |     def collater(self, samples):
54 |         # TODO For now only supports datasets with same underlying collater implementations
55 | 
56 |         all_keys = set()
57 |         for s in samples:
58 |             all_keys.update(s)
59 | 
60 |         shared_keys = all_keys
61 |         for s in samples:
62 |             shared_keys = shared_keys & set(s.keys())
63 | 
64 |         samples_shared_keys = []
65 |         for s in samples:
66 |             samples_shared_keys.append({k: s[k] for k in s.keys() if k in shared_keys})
67 | 
68 |         return self.datasets[0].collater(samples_shared_keys)
69 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/caption_datasets.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright (c) 2022, salesforce.com, inc.
 3 |  All rights reserved.
 4 |  SPDX-License-Identifier: BSD-3-Clause
 5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | """
 7 | 
 8 | import os
 9 | from collections import OrderedDict
10 | 
11 | from minigpt4.datasets.datasets.base_dataset import BaseDataset
12 | from PIL import Image
13 | 
14 | 
15 | class __DisplMixin:
16 |     def displ_item(self, index):
17 |         sample, ann = self.__getitem__(index), self.annotation[index]
18 | 
19 |         return OrderedDict(
20 |             {
21 |                 "file": ann["image"],
22 |                 "caption": ann["caption"],
23 |                 "image": sample["image"],
24 |             }
25 |         )
26 | 
27 | 
28 | class CaptionDataset(BaseDataset, __DisplMixin):
29 |     def __init__(self, vis_processor, text_processor, vis_root, ann_paths):
30 |         """
31 |         vis_root (string): Root directory of images (e.g. coco/images/)
32 |         ann_root (string): directory to store the annotation file
33 |         """
34 |         super().__init__(vis_processor, text_processor, vis_root, ann_paths)
35 | 
36 |         self.img_ids = {}
37 |         n = 0
38 |         for ann in self.annotation:
39 |             img_id = ann["image_id"]
40 |             if img_id not in self.img_ids.keys():
41 |                 self.img_ids[img_id] = n
42 |                 n += 1
43 | 
44 |     def __getitem__(self, index):
45 | 
46 |         # TODO this assumes image input, not general enough
47 |         ann = self.annotation[index]
48 | 
49 |         img_file = '{:0>12}.jpg'.format(ann["image_id"])
50 |         image_path = os.path.join(self.vis_root, img_file)
51 |         image = Image.open(image_path).convert("RGB")
52 | 
53 |         image = self.vis_processor(image)
54 |         caption = self.text_processor(ann["caption"])
55 | 
56 |         return {
57 |             "image": image,
58 |             "text_input": caption,
59 |             "image_id": self.img_ids[ann["image_id"]],
60 |         }
61 | 
62 | 
63 | class CaptionEvalDataset(BaseDataset, __DisplMixin):
64 |     def __init__(self, vis_processor, text_processor, vis_root, ann_paths):
65 |         """
66 |         vis_root (string): Root directory of images (e.g. coco/images/)
67 |         ann_root (string): directory to store the annotation file
68 |         split (string): val or test
69 |         """
70 |         super().__init__(vis_processor, text_processor, vis_root, ann_paths)
71 | 
72 |     def __getitem__(self, index):
73 | 
74 |         ann = self.annotation[index]
75 | 
76 |         image_path = os.path.join(self.vis_root, ann["image"])
77 |         image = Image.open(image_path).convert("RGB")
78 | 
79 |         image = self.vis_processor(image)
80 | 
81 |         return {
82 |             "image": image,
83 |             "image_id": ann["image_id"],
84 |             "instance_id": ann["instance_id"],
85 |         }
86 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/cc_sbu_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from PIL import Image
 3 | import webdataset as wds
 4 | from minigpt4.datasets.datasets.base_dataset import BaseDataset
 5 | from minigpt4.datasets.datasets.caption_datasets import CaptionDataset
 6 | 
 7 | 
 8 | class CCSBUDataset(BaseDataset):
 9 |     def __init__(self, vis_processor, text_processor, location):
10 |         super().__init__(vis_processor=vis_processor, text_processor=text_processor)
11 | 
12 |         self.inner_dataset = wds.DataPipeline(
13 |             wds.ResampledShards(location),
14 |             wds.tarfile_to_samples(handler=wds.warn_and_continue),
15 |             wds.shuffle(1000, handler=wds.warn_and_continue),
16 |             wds.decode("pilrgb", handler=wds.warn_and_continue),
17 |             wds.to_tuple("jpg", "json", handler=wds.warn_and_continue),
18 |             wds.map_tuple(self.vis_processor, handler=wds.warn_and_continue),
19 |             wds.map(self.to_dict, handler=wds.warn_and_continue),
20 |         )
21 | 
22 |     def to_dict(self, sample):
23 |         return {
24 |             "image": sample[0],
25 |             "text_input": self.text_processor(sample[1]["caption"]),
26 |         }
27 | 
28 | 
29 | class CCSBUAlignDataset(CaptionDataset):
30 | 
31 |     def __getitem__(self, index):
32 | 
33 |         # TODO this assumes image input, not general enough
34 |         ann = self.annotation[index]
35 | 
36 |         img_file = '{}.jpg'.format(ann["image_id"])
37 |         image_path = os.path.join(self.vis_root, img_file)
38 |         image = Image.open(image_path).convert("RGB")
39 | 
40 |         image = self.vis_processor(image)
41 |         caption = ann["caption"]
42 | 
43 |         return {
44 |             "image": image,
45 |             "text_input": caption,
46 |             "image_id": self.img_ids[ann["image_id"]],
47 |         }


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/datasets/datasets/laion_dataset.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright (c) 2022, salesforce.com, inc.
 3 |  All rights reserved.
 4 |  SPDX-License-Identifier: BSD-3-Clause
 5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | """
 7 | 
 8 | import webdataset as wds
 9 | from minigpt4.datasets.datasets.base_dataset import BaseDataset
10 | 
11 | 
12 | class LaionDataset(BaseDataset):
13 |     def __init__(self, vis_processor, text_processor, location):
14 |         super().__init__(vis_processor=vis_processor, text_processor=text_processor)
15 | 
16 |         self.inner_dataset = wds.DataPipeline(
17 |             wds.ResampledShards(location),
18 |             wds.tarfile_to_samples(handler=wds.warn_and_continue),
19 |             wds.shuffle(1000, handler=wds.warn_and_continue),
20 |             wds.decode("pilrgb", handler=wds.warn_and_continue),
21 |             wds.to_tuple("jpg", "json", handler=wds.warn_and_continue),
22 |             wds.map_tuple(self.vis_processor, handler=wds.warn_and_continue),
23 |             wds.map(self.to_dict, handler=wds.warn_and_continue),
24 |         )
25 | 
26 |     def to_dict(self, sample):
27 |         return {
28 |             "image": sample[0],
29 |             "text_input": self.text_processor(sample[1]["caption"]),
30 |         }
31 | 
32 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/Qformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/Qformer.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/Qformer.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/Qformer.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/base_model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/base_model.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/base_model.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/base_model.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/blip2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/blip2.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/blip2.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/blip2.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/eva_vit.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/eva_vit.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/eva_vit.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/eva_vit.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/mini_gpt4.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/mini_gpt4.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/mini_gpt4.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/mini_gpt4.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/modeling_llama.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/modeling_llama.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/modeling_llama.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/models/__pycache__/modeling_llama.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/models/blip2_outputs.py:
--------------------------------------------------------------------------------
  1 | """
  2 |  Copyright (c) 2022, salesforce.com, inc.
  3 |  All rights reserved.
  4 |  SPDX-License-Identifier: BSD-3-Clause
  5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
  6 | """
  7 | 
  8 | from dataclasses import dataclass
  9 | from typing import Optional
 10 | 
 11 | import torch
 12 | from transformers.modeling_outputs import (
 13 |     ModelOutput,
 14 |     BaseModelOutputWithPoolingAndCrossAttentions,
 15 |     CausalLMOutputWithCrossAttentions,
 16 | )
 17 | 
 18 | 
 19 | @dataclass
 20 | class BlipSimilarity(ModelOutput):
 21 |     sim_i2t: torch.FloatTensor = None
 22 |     sim_t2i: torch.FloatTensor = None
 23 | 
 24 |     sim_i2t_m: Optional[torch.FloatTensor] = None
 25 |     sim_t2i_m: Optional[torch.FloatTensor] = None
 26 | 
 27 |     sim_i2t_targets: Optional[torch.FloatTensor] = None
 28 |     sim_t2i_targets: Optional[torch.FloatTensor] = None
 29 | 
 30 | 
 31 | @dataclass
 32 | class BlipIntermediateOutput(ModelOutput):
 33 |     """
 34 |     Data class for intermediate outputs of BLIP models.
 35 | 
 36 |     image_embeds (torch.FloatTensor): Image embeddings, shape (batch_size, num_patches, embed_dim).
 37 |     text_embeds (torch.FloatTensor): Text embeddings, shape (batch_size, seq_len, embed_dim).
 38 | 
 39 |     image_embeds_m (torch.FloatTensor): Image embeddings from momentum visual encoder, shape (batch_size, num_patches, embed_dim).
 40 |     text_embeds_m (torch.FloatTensor): Text embeddings from momentum text encoder, shape (batch_size, seq_len, embed_dim).
 41 | 
 42 |     encoder_output (BaseModelOutputWithPoolingAndCrossAttentions): output from the image-grounded text encoder.
 43 |     encoder_output_neg (BaseModelOutputWithPoolingAndCrossAttentions): output from the image-grounded text encoder for negative pairs.
 44 | 
 45 |     decoder_output (CausalLMOutputWithCrossAttentions): output from the image-grounded text decoder.
 46 |     decoder_labels (torch.LongTensor): labels for the captioning loss.
 47 | 
 48 |     itm_logits (torch.FloatTensor): logits for the image-text matching loss, shape (batch_size * 3, 2).
 49 |     itm_labels (torch.LongTensor): labels for the image-text matching loss, shape (batch_size * 3,)
 50 | 
 51 |     """
 52 | 
 53 |     # uni-modal features
 54 |     image_embeds: torch.FloatTensor = None
 55 |     text_embeds: Optional[torch.FloatTensor] = None
 56 | 
 57 |     image_embeds_m: Optional[torch.FloatTensor] = None
 58 |     text_embeds_m: Optional[torch.FloatTensor] = None
 59 | 
 60 |     # intermediate outputs of multimodal encoder
 61 |     encoder_output: Optional[BaseModelOutputWithPoolingAndCrossAttentions] = None
 62 |     encoder_output_neg: Optional[BaseModelOutputWithPoolingAndCrossAttentions] = None
 63 | 
 64 |     itm_logits: Optional[torch.FloatTensor] = None
 65 |     itm_labels: Optional[torch.LongTensor] = None
 66 | 
 67 |     # intermediate outputs of multimodal decoder
 68 |     decoder_output: Optional[CausalLMOutputWithCrossAttentions] = None
 69 |     decoder_labels: Optional[torch.LongTensor] = None
 70 | 
 71 | 
 72 | @dataclass
 73 | class BlipOutput(ModelOutput):
 74 |     # some finetuned models (e.g. BlipVQA) do not compute similarity, thus optional.
 75 |     sims: Optional[BlipSimilarity] = None
 76 | 
 77 |     intermediate_output: BlipIntermediateOutput = None
 78 | 
 79 |     loss: Optional[torch.FloatTensor] = None
 80 | 
 81 |     loss_itc: Optional[torch.FloatTensor] = None
 82 | 
 83 |     loss_itm: Optional[torch.FloatTensor] = None
 84 | 
 85 |     loss_lm: Optional[torch.FloatTensor] = None
 86 | 
 87 | 
 88 | @dataclass
 89 | class BlipOutputFeatures(ModelOutput):
 90 |     """
 91 |     Data class of features from BlipFeatureExtractor.
 92 | 
 93 |     Args:
 94 |         image_embeds: (torch.FloatTensor) of shape (batch_size, num_patches+1, embed_dim), optional
 95 |         image_features: (torch.FloatTensor) of shape (batch_size, num_patches+1, feature_dim), optional
 96 |         text_embeds: (torch.FloatTensor) of shape (batch_size, sequence_length+1, embed_dim), optional
 97 |         text_features: (torch.FloatTensor) of shape (batch_size, sequence_length+1, feature_dim), optional
 98 | 
 99 |         The first embedding or feature is for the [CLS] token.
100 | 
101 |         Features are obtained by projecting the corresponding embedding into a normalized low-dimensional space.
102 |     """
103 | 
104 |     image_embeds: Optional[torch.FloatTensor] = None
105 |     image_embeds_proj: Optional[torch.FloatTensor] = None
106 | 
107 |     text_embeds: Optional[torch.FloatTensor] = None
108 |     text_embeds_proj: Optional[torch.FloatTensor] = None
109 | 
110 |     multimodal_embeds: Optional[torch.FloatTensor] = None
111 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/processors/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright (c) 2022, salesforce.com, inc.
 3 |  All rights reserved.
 4 |  SPDX-License-Identifier: BSD-3-Clause
 5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | """
 7 | 
 8 | from minigpt4.processors.base_processor import BaseProcessor
 9 | from minigpt4.processors.blip_processors import (
10 |     Blip2ImageTrainProcessor,
11 |     Blip2ImageEvalProcessor,
12 |     BlipCaptionProcessor,
13 | )
14 | 
15 | from minigpt4.common.registry import registry
16 | 
17 | __all__ = [
18 |     "BaseProcessor",
19 |     "Blip2ImageTrainProcessor",
20 |     "Blip2ImageEvalProcessor",
21 |     "BlipCaptionProcessor",
22 | ]
23 | 
24 | 
25 | def load_processor(name, cfg=None):
26 |     """
27 |     Example
28 | 
29 |     >>> processor = load_processor("alpro_video_train", cfg=None)
30 |     """
31 |     processor = registry.get_processor_class(name).from_config(cfg)
32 | 
33 |     return processor
34 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/base_processor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/base_processor.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/base_processor.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/base_processor.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/blip_processors.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/blip_processors.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/blip_processors.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/blip_processors.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/randaugment.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/randaugment.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/randaugment.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/processors/__pycache__/randaugment.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/processors/base_processor.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright (c) 2022, salesforce.com, inc.
 3 |  All rights reserved.
 4 |  SPDX-License-Identifier: BSD-3-Clause
 5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | """
 7 | 
 8 | from omegaconf import OmegaConf
 9 | 
10 | 
11 | class BaseProcessor:
12 |     def __init__(self):
13 |         self.transform = lambda x: x
14 |         return
15 | 
16 |     def __call__(self, item):
17 |         return self.transform(item)
18 | 
19 |     @classmethod
20 |     def from_config(cls, cfg=None):
21 |         return cls()
22 | 
23 |     def build(self, **kwargs):
24 |         cfg = OmegaConf.create(kwargs)
25 | 
26 |         return self.from_config(cfg)
27 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/runners/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright (c) 2022, salesforce.com, inc.
 3 |  All rights reserved.
 4 |  SPDX-License-Identifier: BSD-3-Clause
 5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | """
 7 | 
 8 | from minigpt4.runners.runner_base import RunnerBase
 9 | 
10 | __all__ = ["RunnerBase"]
11 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/runners/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/runners/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/runners/__pycache__/runner_base.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/runners/__pycache__/runner_base.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright (c) 2022, salesforce.com, inc.
 3 |  All rights reserved.
 4 |  SPDX-License-Identifier: BSD-3-Clause
 5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | """
 7 | 
 8 | from minigpt4.common.registry import registry
 9 | from minigpt4.tasks.base_task import BaseTask
10 | from minigpt4.tasks.image_text_pretrain import ImageTextPretrainTask
11 | 
12 | 
13 | def setup_task(cfg):
14 |     assert "task" in cfg.run_cfg, "Task name must be provided."
15 | 
16 |     task_name = cfg.run_cfg.task
17 |     task = registry.get_task_class(task_name).setup_task(cfg=cfg)
18 |     assert task is not None, "Task {} not properly registered.".format(task_name)
19 | 
20 |     return task
21 | 
22 | 
23 | __all__ = [
24 |     "BaseTask",
25 |     "ImageTextPretrainTask",
26 | ]
27 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/base_task.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/base_task.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/image_text_pretrain.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase I/minigpt4/tasks/__pycache__/image_text_pretrain.cpython-39.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase I/minigpt4/tasks/image_text_pretrain.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright (c) 2022, salesforce.com, inc.
 3 |  All rights reserved.
 4 |  SPDX-License-Identifier: BSD-3-Clause
 5 |  For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
 6 | """
 7 | 
 8 | from minigpt4.common.registry import registry
 9 | from minigpt4.tasks.base_task import BaseTask
10 | 
11 | 
12 | @registry.register_task("image_text_pretrain")
13 | class ImageTextPretrainTask(BaseTask):
14 |     def __init__(self):
15 |         super().__init__()
16 | 
17 |     def evaluation(self, model, data_loader, cuda_enabled=True):
18 |         pass
19 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/_C.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/_C.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/_C.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/_C.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__init__.py


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py:
--------------------------------------------------------------------------------
 1 | batch_size = 1
 2 | modelname = "groundingdino"
 3 | backbone = "swin_B_384_22k"
 4 | position_embedding = "sine"
 5 | pe_temperatureH = 20
 6 | pe_temperatureW = 20
 7 | return_interm_indices = [1, 2, 3]
 8 | backbone_freeze_keywords = None
 9 | enc_layers = 6
10 | dec_layers = 6
11 | pre_norm = False
12 | dim_feedforward = 2048
13 | hidden_dim = 256
14 | dropout = 0.0
15 | nheads = 8
16 | num_queries = 900
17 | query_dim = 4
18 | num_patterns = 0
19 | num_feature_levels = 4
20 | enc_n_points = 4
21 | dec_n_points = 4
22 | two_stage_type = "standard"
23 | two_stage_bbox_embed_share = False
24 | two_stage_class_embed_share = False
25 | transformer_activation = "relu"
26 | dec_pred_bbox_embed_share = True
27 | dn_box_noise_scale = 1.0
28 | dn_label_noise_ratio = 0.5
29 | dn_label_coef = 1.0
30 | dn_bbox_coef = 1.0
31 | embed_init_tgt = True
32 | dn_labelbook_size = 2000
33 | max_text_len = 256
34 | text_encoder_type = "bert-base-uncased"
35 | use_text_enhancer = True
36 | use_fusion_layer = True
37 | use_checkpoint = True
38 | use_transformer_ckpt = True
39 | use_text_cross_attention = True
40 | text_dropout = 0.0
41 | fusion_dropout = 0.0
42 | fusion_droppath = 0.1
43 | sub_sentence_present = True
44 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py:
--------------------------------------------------------------------------------
 1 | batch_size = 1
 2 | modelname = "groundingdino"
 3 | backbone = "swin_T_224_1k"
 4 | position_embedding = "sine"
 5 | pe_temperatureH = 20
 6 | pe_temperatureW = 20
 7 | return_interm_indices = [1, 2, 3]
 8 | backbone_freeze_keywords = None
 9 | enc_layers = 6
10 | dec_layers = 6
11 | pre_norm = False
12 | dim_feedforward = 2048
13 | hidden_dim = 256
14 | dropout = 0.0
15 | nheads = 8
16 | num_queries = 900
17 | query_dim = 4
18 | num_patterns = 0
19 | num_feature_levels = 4
20 | enc_n_points = 4
21 | dec_n_points = 4
22 | two_stage_type = "standard"
23 | two_stage_bbox_embed_share = False
24 | two_stage_class_embed_share = False
25 | transformer_activation = "relu"
26 | dec_pred_bbox_embed_share = True
27 | dn_box_noise_scale = 1.0
28 | dn_label_noise_ratio = 0.5
29 | dn_label_coef = 1.0
30 | dn_bbox_coef = 1.0
31 | embed_init_tgt = True
32 | dn_labelbook_size = 2000
33 | max_text_len = 256
34 | text_encoder_type = "bert-base-uncased"
35 | use_text_enhancer = True
36 | use_fusion_layer = True
37 | use_checkpoint = True
38 | use_transformer_ckpt = True
39 | use_text_cross_attention = True
40 | text_dropout = 0.0
41 | fusion_dropout = 0.0
42 | fusion_droppath = 0.1
43 | sub_sentence_present = True
44 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__init__.py


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/transforms.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/transforms.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/transforms.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/datasets/__pycache__/transforms.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Grounding DINO
 3 | # url: https://github.com/IDEA-Research/GroundingDINO
 4 | # Copyright (c) 2023 IDEA. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------
 7 | # Conditional DETR
 8 | # Copyright (c) 2021 Microsoft. All Rights Reserved.
 9 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
10 | # ------------------------------------------------------------------------
11 | # Copied from DETR (https://github.com/facebookresearch/detr)
12 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
13 | # ------------------------------------------------------------------------
14 | 
15 | from .groundingdino import build_groundingdino
16 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/bertwarper.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/bertwarper.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/bertwarper.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/bertwarper.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/fuse_modules.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/fuse_modules.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/fuse_modules.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/fuse_modules.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/groundingdino.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/groundingdino.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/groundingdino.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/groundingdino.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/ms_deform_attn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/ms_deform_attn.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/ms_deform_attn.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/ms_deform_attn.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer_vanilla.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer_vanilla.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer_vanilla.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/transformer_vanilla.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbone import build_backbone
2 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/backbone.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/backbone.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/backbone.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/backbone.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/position_encoding.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/position_encoding.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/position_encoding.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/position_encoding.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/swin_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/swin_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/swin_transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__pycache__/swin_transformer.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | 
13 | #include "ms_deform_attn_cpu.h"
14 | 
15 | #ifdef WITH_CUDA
16 | #include "ms_deform_attn_cuda.h"
17 | #endif
18 | 
19 | namespace groundingdino {
20 | 
21 | at::Tensor
22 | ms_deform_attn_forward(
23 |     const at::Tensor &value, 
24 |     const at::Tensor &spatial_shapes,
25 |     const at::Tensor &level_start_index,
26 |     const at::Tensor &sampling_loc,
27 |     const at::Tensor &attn_weight,
28 |     const int im2col_step)
29 | {
30 |     if (value.type().is_cuda())
31 |     {
32 | #ifdef WITH_CUDA
33 |         return ms_deform_attn_cuda_forward(
34 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
35 | #else
36 |         AT_ERROR("Not compiled with GPU support");
37 | #endif
38 |     }
39 |     AT_ERROR("Not implemented on the CPU");
40 | }
41 | 
42 | std::vector<at::Tensor>
43 | ms_deform_attn_backward(
44 |     const at::Tensor &value, 
45 |     const at::Tensor &spatial_shapes,
46 |     const at::Tensor &level_start_index,
47 |     const at::Tensor &sampling_loc,
48 |     const at::Tensor &attn_weight,
49 |     const at::Tensor &grad_output,
50 |     const int im2col_step)
51 | {
52 |     if (value.type().is_cuda())
53 |     {
54 | #ifdef WITH_CUDA
55 |         return ms_deform_attn_cuda_backward(
56 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
57 | #else
58 |         AT_ERROR("Not compiled with GPU support");
59 | #endif
60 |     }
61 |     AT_ERROR("Not implemented on the CPU");
62 | }
63 | 
64 | } // namespace groundingdino


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #include <vector>
12 | 
13 | #include <ATen/ATen.h>
14 | #include <ATen/cuda/CUDAContext.h>
15 | 
16 | namespace groundingdino {
17 | 
18 | at::Tensor
19 | ms_deform_attn_cpu_forward(
20 |     const at::Tensor &value, 
21 |     const at::Tensor &spatial_shapes,
22 |     const at::Tensor &level_start_index,
23 |     const at::Tensor &sampling_loc,
24 |     const at::Tensor &attn_weight,
25 |     const int im2col_step)
26 | {
27 |     AT_ERROR("Not implement on cpu");
28 | }
29 | 
30 | std::vector<at::Tensor>
31 | ms_deform_attn_cpu_backward(
32 |     const at::Tensor &value, 
33 |     const at::Tensor &spatial_shapes,
34 |     const at::Tensor &level_start_index,
35 |     const at::Tensor &sampling_loc,
36 |     const at::Tensor &attn_weight,
37 |     const at::Tensor &grad_output,
38 |     const int im2col_step)
39 | {
40 |     AT_ERROR("Not implement on cpu");
41 | }
42 | 
43 | } // namespace groundingdino
44 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | namespace groundingdino {
15 | 
16 | at::Tensor
17 | ms_deform_attn_cpu_forward(
18 |     const at::Tensor &value, 
19 |     const at::Tensor &spatial_shapes,
20 |     const at::Tensor &level_start_index,
21 |     const at::Tensor &sampling_loc,
22 |     const at::Tensor &attn_weight,
23 |     const int im2col_step);
24 | 
25 | std::vector<at::Tensor>
26 | ms_deform_attn_cpu_backward(
27 |     const at::Tensor &value, 
28 |     const at::Tensor &spatial_shapes,
29 |     const at::Tensor &level_start_index,
30 |     const at::Tensor &sampling_loc,
31 |     const at::Tensor &attn_weight,
32 |     const at::Tensor &grad_output,
33 |     const int im2col_step);
34 | 
35 | } // namespace groundingdino
36 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cuda.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | namespace groundingdino {
15 | 
16 | at::Tensor ms_deform_attn_cuda_forward(
17 |     const at::Tensor &value, 
18 |     const at::Tensor &spatial_shapes,
19 |     const at::Tensor &level_start_index,
20 |     const at::Tensor &sampling_loc,
21 |     const at::Tensor &attn_weight,
22 |     const int im2col_step);
23 | 
24 | std::vector<at::Tensor> ms_deform_attn_cuda_backward(
25 |     const at::Tensor &value, 
26 |     const at::Tensor &spatial_shapes,
27 |     const at::Tensor &level_start_index,
28 |     const at::Tensor &sampling_loc,
29 |     const at::Tensor &attn_weight,
30 |     const at::Tensor &grad_output,
31 |     const int im2col_step);
32 | 
33 | } // namespace groundingdino


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/cuda_version.cu:
--------------------------------------------------------------------------------
1 | #include <cuda_runtime_api.h>
2 | 
3 | namespace groundingdino {
4 | int get_cudart_version() {
5 |   return CUDART_VERSION;
6 | }
7 | } // namespace groundingdino
8 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | #include "MsDeformAttn/ms_deform_attn.h"
 4 | 
 5 | namespace groundingdino {
 6 | 
 7 | #ifdef WITH_CUDA
 8 | extern int get_cudart_version();
 9 | #endif
10 | 
11 | std::string get_cuda_version() {
12 | #ifdef WITH_CUDA
13 |   std::ostringstream oss;
14 | 
15 |   // copied from
16 |   // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
17 |   auto printCudaStyleVersion = [&](int v) {
18 |     oss << (v / 1000) << "." << (v / 10 % 100);
19 |     if (v % 10 != 0) {
20 |       oss << "." << (v % 10);
21 |     }
22 |   };
23 |   printCudaStyleVersion(get_cudart_version());
24 |   return oss.str();
25 | #else
26 |   return std::string("not available");
27 | #endif
28 | }
29 | 
30 | // similar to
31 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
32 | std::string get_compiler_version() {
33 |   std::ostringstream ss;
34 | #if defined(__GNUC__)
35 | #ifndef __clang__
36 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
37 | #endif
38 | #endif
39 | 
40 | #if defined(__clang_major__)
41 |   {
42 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
43 |        << __clang_patchlevel__;
44 |   }
45 | #endif
46 | 
47 | #if defined(_MSC_VER)
48 |   { ss << "MSVC " << _MSC_FULL_VER; }
49 | #endif
50 |   return ss.str();
51 | }
52 | 
53 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
54 |   m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
55 |   m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
56 | }
57 | 
58 | } // namespace groundingdino


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/GroundingDINO/transformer_vanilla.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Grounding DINO
  3 | # url: https://github.com/IDEA-Research/GroundingDINO
  4 | # Copyright (c) 2023 IDEA. All Rights Reserved.
  5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  6 | # ------------------------------------------------------------------------
  7 | # Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved
  8 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  9 | """
 10 | DETR Transformer class.
 11 | 
 12 | Copy-paste from torch.nn.Transformer with modifications:
 13 |     * positional encodings are passed in MHattention
 14 |     * extra LN at the end of encoder is removed
 15 |     * decoder returns a stack of activations from all decoding layers
 16 | """
 17 | from typing import Optional
 18 | 
 19 | import torch
 20 | import torch.nn.functional as F
 21 | from torch import Tensor, nn
 22 | 
 23 | from .utils import (
 24 |     MLP,
 25 |     _get_activation_fn,
 26 |     _get_clones,
 27 |     gen_encoder_output_proposals,
 28 |     gen_sineembed_for_position,
 29 |     sigmoid_focal_loss,
 30 | )
 31 | 
 32 | 
 33 | class TextTransformer(nn.Module):
 34 |     def __init__(self, num_layers, d_model=256, nheads=8, dim_feedforward=2048, dropout=0.1):
 35 |         super().__init__()
 36 |         self.num_layers = num_layers
 37 |         self.d_model = d_model
 38 |         self.nheads = nheads
 39 |         self.dim_feedforward = dim_feedforward
 40 |         self.norm = None
 41 | 
 42 |         single_encoder_layer = TransformerEncoderLayer(
 43 |             d_model=d_model, nhead=nheads, dim_feedforward=dim_feedforward, dropout=dropout
 44 |         )
 45 |         self.layers = _get_clones(single_encoder_layer, num_layers)
 46 | 
 47 |     def forward(self, memory_text: torch.Tensor, text_attention_mask: torch.Tensor):
 48 |         """
 49 | 
 50 |         Args:
 51 |             text_attention_mask: bs, num_token
 52 |             memory_text: bs, num_token, d_model
 53 | 
 54 |         Raises:
 55 |             RuntimeError: _description_
 56 | 
 57 |         Returns:
 58 |             output: bs, num_token, d_model
 59 |         """
 60 | 
 61 |         output = memory_text.transpose(0, 1)
 62 | 
 63 |         for layer in self.layers:
 64 |             output = layer(output, src_key_padding_mask=text_attention_mask)
 65 | 
 66 |         if self.norm is not None:
 67 |             output = self.norm(output)
 68 | 
 69 |         return output.transpose(0, 1)
 70 | 
 71 | 
 72 | class TransformerEncoderLayer(nn.Module):
 73 |     def __init__(
 74 |         self,
 75 |         d_model,
 76 |         nhead,
 77 |         dim_feedforward=2048,
 78 |         dropout=0.1,
 79 |         activation="relu",
 80 |         normalize_before=False,
 81 |     ):
 82 |         super().__init__()
 83 |         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
 84 |         # Implementation of Feedforward model
 85 |         self.linear1 = nn.Linear(d_model, dim_feedforward)
 86 |         self.dropout = nn.Dropout(dropout)
 87 |         self.linear2 = nn.Linear(dim_feedforward, d_model)
 88 | 
 89 |         self.norm1 = nn.LayerNorm(d_model)
 90 |         self.norm2 = nn.LayerNorm(d_model)
 91 |         self.dropout1 = nn.Dropout(dropout)
 92 |         self.dropout2 = nn.Dropout(dropout)
 93 | 
 94 |         self.activation = _get_activation_fn(activation)
 95 |         self.normalize_before = normalize_before
 96 |         self.nhead = nhead
 97 | 
 98 |     def with_pos_embed(self, tensor, pos: Optional[Tensor]):
 99 |         return tensor if pos is None else tensor + pos
100 | 
101 |     def forward(
102 |         self,
103 |         src,
104 |         src_mask: Optional[Tensor] = None,
105 |         src_key_padding_mask: Optional[Tensor] = None,
106 |         pos: Optional[Tensor] = None,
107 |     ):
108 |         # repeat attn mask
109 |         if src_mask.dim() == 3 and src_mask.shape[0] == src.shape[1]:
110 |             # bs, num_q, num_k
111 |             src_mask = src_mask.repeat(self.nhead, 1, 1)
112 | 
113 |         q = k = self.with_pos_embed(src, pos)
114 | 
115 |         src2 = self.self_attn(q, k, value=src, attn_mask=src_mask)[0]
116 | 
117 |         # src2 = self.self_attn(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
118 |         src = src + self.dropout1(src2)
119 |         src = self.norm1(src)
120 |         src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
121 |         src = src + self.dropout2(src2)
122 |         src = self.norm2(src)
123 |         return src
124 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Grounding DINO
 3 | # url: https://github.com/IDEA-Research/GroundingDINO
 4 | # Copyright (c) 2023 IDEA. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------
 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 8 | from .GroundingDINO import build_groundingdino
 9 | 
10 | 
11 | def build_model(args):
12 |     # we use register to maintain models from catdet6 on.
13 |     from .registry import MODULE_BUILD_FUNCS
14 | 
15 |     assert args.modelname in MODULE_BUILD_FUNCS._module_dict
16 |     build_func = MODULE_BUILD_FUNCS.get(args.modelname)
17 |     model = build_func(args)
18 |     return model
19 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/registry.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/registry.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/registry.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/__pycache__/registry.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/models/registry.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Grounding DINO
 3 | # url: https://github.com/IDEA-Research/GroundingDINO
 4 | # Copyright (c) 2023 IDEA. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------
 7 | # -*- coding: utf-8 -*-
 8 | # @Author: Yihao Chen
 9 | # @Date:   2021-08-16 16:03:17
10 | # @Last Modified by:   Shilong Liu
11 | # @Last Modified time: 2022-01-23 15:26
12 | # modified from mmcv
13 | 
14 | import inspect
15 | from functools import partial
16 | 
17 | 
18 | class Registry(object):
19 |     def __init__(self, name):
20 |         self._name = name
21 |         self._module_dict = dict()
22 | 
23 |     def __repr__(self):
24 |         format_str = self.__class__.__name__ + "(name={}, items={})".format(
25 |             self._name, list(self._module_dict.keys())
26 |         )
27 |         return format_str
28 | 
29 |     def __len__(self):
30 |         return len(self._module_dict)
31 | 
32 |     @property
33 |     def name(self):
34 |         return self._name
35 | 
36 |     @property
37 |     def module_dict(self):
38 |         return self._module_dict
39 | 
40 |     def get(self, key):
41 |         return self._module_dict.get(key, None)
42 | 
43 |     def registe_with_name(self, module_name=None, force=False):
44 |         return partial(self.register, module_name=module_name, force=force)
45 | 
46 |     def register(self, module_build_function, module_name=None, force=False):
47 |         """Register a module build function.
48 |         Args:
49 |             module (:obj:`nn.Module`): Module to be registered.
50 |         """
51 |         if not inspect.isfunction(module_build_function):
52 |             raise TypeError(
53 |                 "module_build_function must be a function, but got {}".format(
54 |                     type(module_build_function)
55 |                 )
56 |             )
57 |         if module_name is None:
58 |             module_name = module_build_function.__name__
59 |         if not force and module_name in self._module_dict:
60 |             raise KeyError("{} is already registered in {}".format(module_name, self.name))
61 |         self._module_dict[module_name] = module_build_function
62 | 
63 |         return module_build_function
64 | 
65 | 
66 | MODULE_BUILD_FUNCS = Registry("model build functions")
67 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/box_ops.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/box_ops.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/box_ops.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/box_ops.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/get_tokenlizer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/get_tokenlizer.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/get_tokenlizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/get_tokenlizer.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/inference.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/inference.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/inference.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/inference.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/misc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/misc.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/slconfig.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/slconfig.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/slconfig.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/slconfig.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/visualizer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/visualizer.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/visualizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/visualizer.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/vl_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/vl_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/vl_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/__pycache__/vl_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/box_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | """
  3 | Utilities for bounding box manipulation and GIoU.
  4 | """
  5 | import torch
  6 | from torchvision.ops.boxes import box_area
  7 | 
  8 | 
  9 | def box_cxcywh_to_xyxy(x):
 10 |     x_c, y_c, w, h = x.unbind(-1)
 11 |     b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
 12 |     return torch.stack(b, dim=-1)
 13 | 
 14 | 
 15 | def box_xyxy_to_cxcywh(x):
 16 |     x0, y0, x1, y1 = x.unbind(-1)
 17 |     b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
 18 |     return torch.stack(b, dim=-1)
 19 | 
 20 | 
 21 | # modified from torchvision to also return the union
 22 | def box_iou(boxes1, boxes2):
 23 |     area1 = box_area(boxes1)
 24 |     area2 = box_area(boxes2)
 25 | 
 26 |     # import ipdb; ipdb.set_trace()
 27 |     lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
 28 |     rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
 29 | 
 30 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
 31 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
 32 | 
 33 |     union = area1[:, None] + area2 - inter
 34 | 
 35 |     iou = inter / (union + 1e-6)
 36 |     return iou, union
 37 | 
 38 | 
 39 | def generalized_box_iou(boxes1, boxes2):
 40 |     """
 41 |     Generalized IoU from https://giou.stanford.edu/
 42 | 
 43 |     The boxes should be in [x0, y0, x1, y1] format
 44 | 
 45 |     Returns a [N, M] pairwise matrix, where N = len(boxes1)
 46 |     and M = len(boxes2)
 47 |     """
 48 |     # degenerate boxes gives inf / nan results
 49 |     # so do an early check
 50 |     assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
 51 |     assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
 52 |     # except:
 53 |     #     import ipdb; ipdb.set_trace()
 54 |     iou, union = box_iou(boxes1, boxes2)
 55 | 
 56 |     lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
 57 |     rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
 58 | 
 59 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
 60 |     area = wh[:, :, 0] * wh[:, :, 1]
 61 | 
 62 |     return iou - (area - union) / (area + 1e-6)
 63 | 
 64 | 
 65 | # modified from torchvision to also return the union
 66 | def box_iou_pairwise(boxes1, boxes2):
 67 |     area1 = box_area(boxes1)
 68 |     area2 = box_area(boxes2)
 69 | 
 70 |     lt = torch.max(boxes1[:, :2], boxes2[:, :2])  # [N,2]
 71 |     rb = torch.min(boxes1[:, 2:], boxes2[:, 2:])  # [N,2]
 72 | 
 73 |     wh = (rb - lt).clamp(min=0)  # [N,2]
 74 |     inter = wh[:, 0] * wh[:, 1]  # [N]
 75 | 
 76 |     union = area1 + area2 - inter
 77 | 
 78 |     iou = inter / union
 79 |     return iou, union
 80 | 
 81 | 
 82 | def generalized_box_iou_pairwise(boxes1, boxes2):
 83 |     """
 84 |     Generalized IoU from https://giou.stanford.edu/
 85 | 
 86 |     Input:
 87 |         - boxes1, boxes2: N,4
 88 |     Output:
 89 |         - giou: N, 4
 90 |     """
 91 |     # degenerate boxes gives inf / nan results
 92 |     # so do an early check
 93 |     assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
 94 |     assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
 95 |     assert boxes1.shape == boxes2.shape
 96 |     iou, union = box_iou_pairwise(boxes1, boxes2)  # N, 4
 97 | 
 98 |     lt = torch.min(boxes1[:, :2], boxes2[:, :2])
 99 |     rb = torch.max(boxes1[:, 2:], boxes2[:, 2:])
100 | 
101 |     wh = (rb - lt).clamp(min=0)  # [N,2]
102 |     area = wh[:, 0] * wh[:, 1]
103 | 
104 |     return iou - (area - union) / area
105 | 
106 | 
107 | def masks_to_boxes(masks):
108 |     """Compute the bounding boxes around the provided masks
109 | 
110 |     The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
111 | 
112 |     Returns a [N, 4] tensors, with the boxes in xyxy format
113 |     """
114 |     if masks.numel() == 0:
115 |         return torch.zeros((0, 4), device=masks.device)
116 | 
117 |     h, w = masks.shape[-2:]
118 | 
119 |     y = torch.arange(0, h, dtype=torch.float)
120 |     x = torch.arange(0, w, dtype=torch.float)
121 |     y, x = torch.meshgrid(y, x)
122 | 
123 |     x_mask = masks * x.unsqueeze(0)
124 |     x_max = x_mask.flatten(1).max(-1)[0]
125 |     x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
126 | 
127 |     y_mask = masks * y.unsqueeze(0)
128 |     y_max = y_mask.flatten(1).max(-1)[0]
129 |     y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
130 | 
131 |     return torch.stack([x_min, y_min, x_max, y_max], 1)
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     x = torch.rand(5, 4)
136 |     y = torch.rand(3, 4)
137 |     iou, union = box_iou(x, y)
138 |     import ipdb
139 | 
140 |     ipdb.set_trace()
141 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/get_tokenlizer.py:
--------------------------------------------------------------------------------
 1 | from transformers import AutoTokenizer, BertModel, BertTokenizer, RobertaModel, RobertaTokenizerFast
 2 | 
 3 | 
 4 | def get_tokenlizer(text_encoder_type):
 5 |     if not isinstance(text_encoder_type, str):
 6 |         # print("text_encoder_type is not a str")
 7 |         if hasattr(text_encoder_type, "text_encoder_type"):
 8 |             text_encoder_type = text_encoder_type.text_encoder_type
 9 |         elif text_encoder_type.get("text_encoder_type", False):
10 |             text_encoder_type = text_encoder_type.get("text_encoder_type")
11 |         else:
12 |             raise ValueError(
13 |                 "Unknown type of text_encoder_type: {}".format(type(text_encoder_type))
14 |             )
15 |     print("final text_encoder_type: {}".format(text_encoder_type))
16 | 
17 |     tokenizer = AutoTokenizer.from_pretrained(text_encoder_type)
18 |     return tokenizer
19 | 
20 | 
21 | def get_pretrained_language_model(text_encoder_type):
22 |     if text_encoder_type == "bert-base-uncased":
23 |         return BertModel.from_pretrained(text_encoder_type)
24 |     if text_encoder_type == "roberta-base":
25 |         return RobertaModel.from_pretrained(text_encoder_type)
26 |     raise ValueError("Unknown text_encoder_type {}".format(text_encoder_type))
27 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import functools
 3 | import logging
 4 | import os
 5 | import sys
 6 | 
 7 | from termcolor import colored
 8 | 
 9 | 
10 | class _ColorfulFormatter(logging.Formatter):
11 |     def __init__(self, *args, **kwargs):
12 |         self._root_name = kwargs.pop("root_name") + "."
13 |         self._abbrev_name = kwargs.pop("abbrev_name", "")
14 |         if len(self._abbrev_name):
15 |             self._abbrev_name = self._abbrev_name + "."
16 |         super(_ColorfulFormatter, self).__init__(*args, **kwargs)
17 | 
18 |     def formatMessage(self, record):
19 |         record.name = record.name.replace(self._root_name, self._abbrev_name)
20 |         log = super(_ColorfulFormatter, self).formatMessage(record)
21 |         if record.levelno == logging.WARNING:
22 |             prefix = colored("WARNING", "red", attrs=["blink"])
23 |         elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL:
24 |             prefix = colored("ERROR", "red", attrs=["blink", "underline"])
25 |         else:
26 |             return log
27 |         return prefix + " " + log
28 | 
29 | 
30 | # so that calling setup_logger multiple times won't add many handlers
31 | @functools.lru_cache()
32 | def setup_logger(output=None, distributed_rank=0, *, color=True, name="imagenet", abbrev_name=None):
33 |     """
34 |     Initialize the detectron2 logger and set its verbosity level to "INFO".
35 | 
36 |     Args:
37 |         output (str): a file name or a directory to save log. If None, will not save log file.
38 |             If ends with ".txt" or ".log", assumed to be a file name.
39 |             Otherwise, logs will be saved to `output/log.txt`.
40 |         name (str): the root module name of this logger
41 | 
42 |     Returns:
43 |         logging.Logger: a logger
44 |     """
45 |     logger = logging.getLogger(name)
46 |     logger.setLevel(logging.DEBUG)
47 |     logger.propagate = False
48 | 
49 |     if abbrev_name is None:
50 |         abbrev_name = name
51 | 
52 |     plain_formatter = logging.Formatter(
53 |         "[%(asctime)s.%(msecs)03d]: %(message)s", datefmt="%m/%d %H:%M:%S"
54 |     )
55 |     # stdout logging: master only
56 |     if distributed_rank == 0:
57 |         ch = logging.StreamHandler(stream=sys.stdout)
58 |         ch.setLevel(logging.DEBUG)
59 |         if color:
60 |             formatter = _ColorfulFormatter(
61 |                 colored("[%(asctime)s.%(msecs)03d]: ", "green") + "%(message)s",
62 |                 datefmt="%m/%d %H:%M:%S",
63 |                 root_name=name,
64 |                 abbrev_name=str(abbrev_name),
65 |             )
66 |         else:
67 |             formatter = plain_formatter
68 |         ch.setFormatter(formatter)
69 |         logger.addHandler(ch)
70 | 
71 |     # file logging: all workers
72 |     if output is not None:
73 |         if output.endswith(".txt") or output.endswith(".log"):
74 |             filename = output
75 |         else:
76 |             filename = os.path.join(output, "log.txt")
77 |         if distributed_rank > 0:
78 |             filename = filename + f".rank{distributed_rank}"
79 |         os.makedirs(os.path.dirname(filename), exist_ok=True)
80 | 
81 |         fh = logging.StreamHandler(_cached_log_stream(filename))
82 |         fh.setLevel(logging.DEBUG)
83 |         fh.setFormatter(plain_formatter)
84 |         logger.addHandler(fh)
85 | 
86 |     return logger
87 | 
88 | 
89 | # cache the opened file object, so that different calls to `setup_logger`
90 | # with the same file name can safely write to the same file.
91 | @functools.lru_cache(maxsize=None)
92 | def _cached_log_stream(filename):
93 |     return open(filename, "a")
94 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/time_counter.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import time
 3 | 
 4 | 
 5 | class TimeCounter:
 6 |     def __init__(self) -> None:
 7 |         pass
 8 | 
 9 |     def clear(self):
10 |         self.timedict = {}
11 |         self.basetime = time.perf_counter()
12 | 
13 |     def timeit(self, name):
14 |         nowtime = time.perf_counter() - self.basetime
15 |         self.timedict[name] = nowtime
16 |         self.basetime = time.perf_counter()
17 | 
18 | 
19 | class TimeHolder:
20 |     def __init__(self) -> None:
21 |         self.timedict = {}
22 | 
23 |     def update(self, _timedict: dict):
24 |         for k, v in _timedict.items():
25 |             if k not in self.timedict:
26 |                 self.timedict[k] = AverageMeter(name=k, val_only=True)
27 |             self.timedict[k].update(val=v)
28 | 
29 |     def final_res(self):
30 |         return {k: v.avg for k, v in self.timedict.items()}
31 | 
32 |     def __str__(self):
33 |         return json.dumps(self.final_res(), indent=2)
34 | 
35 | 
36 | class AverageMeter(object):
37 |     """Computes and stores the average and current value"""
38 | 
39 |     def __init__(self, name, fmt=":f", val_only=False):
40 |         self.name = name
41 |         self.fmt = fmt
42 |         self.val_only = val_only
43 |         self.reset()
44 | 
45 |     def reset(self):
46 |         self.val = 0
47 |         self.avg = 0
48 |         self.sum = 0
49 |         self.count = 0
50 | 
51 |     def update(self, val, n=1):
52 |         self.val = val
53 |         self.sum += val * n
54 |         self.count += n
55 |         self.avg = self.sum / self.count
56 | 
57 |     def __str__(self):
58 |         if self.val_only:
59 |             fmtstr = "{name} {val" + self.fmt + "}"
60 |         else:
61 |             fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})"
62 |         return fmtstr.format(**self.__dict__)
63 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/util/vl_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | from typing import List
  4 | 
  5 | import torch
  6 | 
  7 | 
  8 | def create_positive_map_from_span(tokenized, token_span, max_text_len=256):
  9 |     """construct a map such that positive_map[i,j] = True iff box i is associated to token j
 10 |     Input:
 11 |         - tokenized:
 12 |             - input_ids: Tensor[1, ntokens]
 13 |             - attention_mask: Tensor[1, ntokens]
 14 |         - token_span: list with length num_boxes.
 15 |             - each item: [start_idx, end_idx]
 16 |     """
 17 |     positive_map = torch.zeros((len(token_span), max_text_len), dtype=torch.float)
 18 |     for j, tok_list in enumerate(token_span):
 19 |         for (beg, end) in tok_list:
 20 |             beg_pos = tokenized.char_to_token(beg)
 21 |             end_pos = tokenized.char_to_token(end - 1)
 22 |             if beg_pos is None:
 23 |                 try:
 24 |                     beg_pos = tokenized.char_to_token(beg + 1)
 25 |                     if beg_pos is None:
 26 |                         beg_pos = tokenized.char_to_token(beg + 2)
 27 |                 except:
 28 |                     beg_pos = None
 29 |             if end_pos is None:
 30 |                 try:
 31 |                     end_pos = tokenized.char_to_token(end - 2)
 32 |                     if end_pos is None:
 33 |                         end_pos = tokenized.char_to_token(end - 3)
 34 |                 except:
 35 |                     end_pos = None
 36 |             if beg_pos is None or end_pos is None:
 37 |                 continue
 38 | 
 39 |             assert beg_pos is not None and end_pos is not None
 40 |             if os.environ.get("SHILONG_DEBUG_ONLY_ONE_POS", None) == "TRUE":
 41 |                 positive_map[j, beg_pos] = 1
 42 |                 break
 43 |             else:
 44 |                 positive_map[j, beg_pos : end_pos + 1].fill_(1)
 45 | 
 46 |     return positive_map / (positive_map.sum(-1)[:, None] + 1e-6)
 47 | 
 48 | 
 49 | def build_captions_and_token_span(cat_list, force_lowercase):
 50 |     """
 51 |     Return:
 52 |         captions: str
 53 |         cat2tokenspan: dict
 54 |             {
 55 |                 'dog': [[0, 2]],
 56 |                 ...
 57 |             }
 58 |     """
 59 | 
 60 |     cat2tokenspan = {}
 61 |     captions = ""
 62 |     for catname in cat_list:
 63 |         class_name = catname
 64 |         if force_lowercase:
 65 |             class_name = class_name.lower()
 66 |         if "/" in class_name:
 67 |             class_name_list: List = class_name.strip().split("/")
 68 |             class_name_list.append(class_name)
 69 |             class_name: str = random.choice(class_name_list)
 70 | 
 71 |         tokens_positive_i = []
 72 |         subnamelist = [i.strip() for i in class_name.strip().split(" ")]
 73 |         for subname in subnamelist:
 74 |             if len(subname) == 0:
 75 |                 continue
 76 |             if len(captions) > 0:
 77 |                 captions = captions + " "
 78 |             strat_idx = len(captions)
 79 |             end_idx = strat_idx + len(subname)
 80 |             tokens_positive_i.append([strat_idx, end_idx])
 81 |             captions = captions + subname
 82 | 
 83 |         if len(tokens_positive_i) > 0:
 84 |             captions = captions + " ."
 85 |             cat2tokenspan[class_name] = tokens_positive_i
 86 | 
 87 |     return captions, cat2tokenspan
 88 | 
 89 | 
 90 | def build_id2posspan_and_caption(category_dict: dict):
 91 |     """Build id2pos_span and caption from category_dict
 92 | 
 93 |     Args:
 94 |         category_dict (dict): category_dict
 95 |     """
 96 |     cat_list = [item["name"].lower() for item in category_dict]
 97 |     id2catname = {item["id"]: item["name"].lower() for item in category_dict}
 98 |     caption, cat2posspan = build_captions_and_token_span(cat_list, force_lowercase=True)
 99 |     id2posspan = {catid: cat2posspan[catname] for catid, catname in id2catname.items()}
100 |     return id2posspan, caption
101 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/GroundingDINO/groundingdino/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1.0'
2 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .build_sam import (
 8 |     build_sam,
 9 |     build_sam_vit_h,
10 |     build_sam_vit_l,
11 |     build_sam_vit_b,
12 |     sam_model_registry,
13 | )
14 | from .predictor import SamPredictor
15 | from .automatic_mask_generator import SamAutomaticMaskGenerator
16 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/automatic_mask_generator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/automatic_mask_generator.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/automatic_mask_generator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/automatic_mask_generator.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/build_sam.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/build_sam.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/build_sam.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/build_sam.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/predictor.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/predictor.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/predictor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/__pycache__/predictor.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/build_sam.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | 
  9 | from functools import partial
 10 | 
 11 | from .modeling import ImageEncoderViT, MaskDecoder, PromptEncoder, Sam, TwoWayTransformer
 12 | 
 13 | 
 14 | def build_sam_vit_h(checkpoint=None):
 15 |     return _build_sam(
 16 |         encoder_embed_dim=1280,
 17 |         encoder_depth=32,
 18 |         encoder_num_heads=16,
 19 |         encoder_global_attn_indexes=[7, 15, 23, 31],
 20 |         checkpoint=checkpoint,
 21 |     )
 22 | 
 23 | 
 24 | build_sam = build_sam_vit_h
 25 | 
 26 | 
 27 | def build_sam_vit_l(checkpoint=None):
 28 |     return _build_sam(
 29 |         encoder_embed_dim=1024,
 30 |         encoder_depth=24,
 31 |         encoder_num_heads=16,
 32 |         encoder_global_attn_indexes=[5, 11, 17, 23],
 33 |         checkpoint=checkpoint,
 34 |     )
 35 | 
 36 | 
 37 | def build_sam_vit_b(checkpoint=None):
 38 |     return _build_sam(
 39 |         encoder_embed_dim=768,
 40 |         encoder_depth=12,
 41 |         encoder_num_heads=12,
 42 |         encoder_global_attn_indexes=[2, 5, 8, 11],
 43 |         checkpoint=checkpoint,
 44 |     )
 45 | 
 46 | 
 47 | sam_model_registry = {
 48 |     "default": build_sam,
 49 |     "vit_h": build_sam,
 50 |     "vit_l": build_sam_vit_l,
 51 |     "vit_b": build_sam_vit_b,
 52 | }
 53 | 
 54 | 
 55 | def _build_sam(
 56 |     encoder_embed_dim,
 57 |     encoder_depth,
 58 |     encoder_num_heads,
 59 |     encoder_global_attn_indexes,
 60 |     checkpoint=None,
 61 | ):
 62 |     prompt_embed_dim = 256
 63 |     image_size = 1024
 64 |     vit_patch_size = 16
 65 |     image_embedding_size = image_size // vit_patch_size
 66 |     sam = Sam(
 67 |         image_encoder=ImageEncoderViT(
 68 |             depth=encoder_depth,
 69 |             embed_dim=encoder_embed_dim,
 70 |             img_size=image_size,
 71 |             mlp_ratio=4,
 72 |             norm_layer=partial(torch.nn.LayerNorm, eps=1e-6),
 73 |             num_heads=encoder_num_heads,
 74 |             patch_size=vit_patch_size,
 75 |             qkv_bias=True,
 76 |             use_rel_pos=True,
 77 |             global_attn_indexes=encoder_global_attn_indexes,
 78 |             window_size=14,
 79 |             out_chans=prompt_embed_dim,
 80 |         ),
 81 |         prompt_encoder=PromptEncoder(
 82 |             embed_dim=prompt_embed_dim,
 83 |             image_embedding_size=(image_embedding_size, image_embedding_size),
 84 |             input_image_size=(image_size, image_size),
 85 |             mask_in_chans=16,
 86 |         ),
 87 |         mask_decoder=MaskDecoder(
 88 |             num_multimask_outputs=3,
 89 |             transformer=TwoWayTransformer(
 90 |                 depth=2,
 91 |                 embedding_dim=prompt_embed_dim,
 92 |                 mlp_dim=2048,
 93 |                 num_heads=8,
 94 |             ),
 95 |             transformer_dim=prompt_embed_dim,
 96 |             iou_head_depth=3,
 97 |             iou_head_hidden_dim=256,
 98 |         ),
 99 |         pixel_mean=[123.675, 116.28, 103.53],
100 |         pixel_std=[58.395, 57.12, 57.375],
101 |     )
102 |     sam.eval()
103 |     if checkpoint is not None:
104 |         with open(checkpoint, "rb") as f:
105 |             state_dict = torch.load(f)
106 |         sam.load_state_dict(state_dict)
107 |     return sam
108 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .sam import Sam
 8 | from .image_encoder import ImageEncoderViT
 9 | from .mask_decoder import MaskDecoder
10 | from .prompt_encoder import PromptEncoder
11 | from .transformer import TwoWayTransformer
12 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/common.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/common.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/common.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/common.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/image_encoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/image_encoder.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/image_encoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/image_encoder.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/mask_decoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/mask_decoder.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/mask_decoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/mask_decoder.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/prompt_encoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/prompt_encoder.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/prompt_encoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/prompt_encoder.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/sam.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/sam.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/sam.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/sam.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/__pycache__/transformer.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/modeling/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | from typing import Type
11 | 
12 | 
13 | class MLPBlock(nn.Module):
14 |     def __init__(
15 |         self,
16 |         embedding_dim: int,
17 |         mlp_dim: int,
18 |         act: Type[nn.Module] = nn.GELU,
19 |     ) -> None:
20 |         super().__init__()
21 |         self.lin1 = nn.Linear(embedding_dim, mlp_dim)
22 |         self.lin2 = nn.Linear(mlp_dim, embedding_dim)
23 |         self.act = act()
24 | 
25 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
26 |         return self.lin2(self.act(self.lin1(x)))
27 | 
28 | 
29 | # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
30 | # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119  # noqa
31 | class LayerNorm2d(nn.Module):
32 |     def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
33 |         super().__init__()
34 |         self.weight = nn.Parameter(torch.ones(num_channels))
35 |         self.bias = nn.Parameter(torch.zeros(num_channels))
36 |         self.eps = eps
37 | 
38 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
39 |         u = x.mean(1, keepdim=True)
40 |         s = (x - u).pow(2).mean(1, keepdim=True)
41 |         x = (x - u) / torch.sqrt(s + self.eps)
42 |         x = self.weight[:, None, None] * x + self.bias[:, None, None]
43 |         return x
44 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | 
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/amg.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/amg.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/amg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/amg.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/transforms.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/transforms.cpython-37.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/transforms.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/__pycache__/transforms.cpython-38.pyc


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/segment_anything/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from torch.nn import functional as F
 10 | from torchvision.transforms.functional import resize, to_pil_image  # type: ignore
 11 | 
 12 | from copy import deepcopy
 13 | from typing import Tuple
 14 | 
 15 | 
 16 | class ResizeLongestSide:
 17 |     """
 18 |     Resizes images to longest side 'target_length', as well as provides
 19 |     methods for resizing coordinates and boxes. Provides methods for
 20 |     transforming both numpy array and batched torch tensors.
 21 |     """
 22 | 
 23 |     def __init__(self, target_length: int) -> None:
 24 |         self.target_length = target_length
 25 | 
 26 |     def apply_image(self, image: np.ndarray) -> np.ndarray:
 27 |         """
 28 |         Expects a numpy array with shape HxWxC in uint8 format.
 29 |         """
 30 |         target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
 31 |         return np.array(resize(to_pil_image(image), target_size))
 32 | 
 33 |     def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
 34 |         """
 35 |         Expects a numpy array of length 2 in the final dimension. Requires the
 36 |         original image size in (H, W) format.
 37 |         """
 38 |         old_h, old_w = original_size
 39 |         new_h, new_w = self.get_preprocess_shape(
 40 |             original_size[0], original_size[1], self.target_length
 41 |         )
 42 |         coords = deepcopy(coords).astype(float)
 43 |         coords[..., 0] = coords[..., 0] * (new_w / old_w)
 44 |         coords[..., 1] = coords[..., 1] * (new_h / old_h)
 45 |         return coords
 46 | 
 47 |     def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
 48 |         """
 49 |         Expects a numpy array shape Bx4. Requires the original image size
 50 |         in (H, W) format.
 51 |         """
 52 |         boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size)
 53 |         return boxes.reshape(-1, 4)
 54 | 
 55 |     def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
 56 |         """
 57 |         Expects batched images with shape BxCxHxW and float format. This
 58 |         transformation may not exactly match apply_image. apply_image is
 59 |         the transformation expected by the model.
 60 |         """
 61 |         # Expects an image in BCHW format. May not exactly match apply_image.
 62 |         target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
 63 |         return F.interpolate(
 64 |             image, target_size, mode="bilinear", align_corners=False, antialias=True
 65 |         )
 66 | 
 67 |     def apply_coords_torch(
 68 |         self, coords: torch.Tensor, original_size: Tuple[int, ...]
 69 |     ) -> torch.Tensor:
 70 |         """
 71 |         Expects a torch tensor with length 2 in the last dimension. Requires the
 72 |         original image size in (H, W) format.
 73 |         """
 74 |         old_h, old_w = original_size
 75 |         new_h, new_w = self.get_preprocess_shape(
 76 |             original_size[0], original_size[1], self.target_length
 77 |         )
 78 |         coords = deepcopy(coords).to(torch.float)
 79 |         coords[..., 0] = coords[..., 0] * (new_w / old_w)
 80 |         coords[..., 1] = coords[..., 1] * (new_h / old_h)
 81 |         return coords
 82 | 
 83 |     def apply_boxes_torch(
 84 |         self, boxes: torch.Tensor, original_size: Tuple[int, ...]
 85 |     ) -> torch.Tensor:
 86 |         """
 87 |         Expects a torch tensor with shape Bx4. Requires the original image
 88 |         size in (H, W) format.
 89 |         """
 90 |         boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size)
 91 |         return boxes.reshape(-1, 4)
 92 | 
 93 |     @staticmethod
 94 |     def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]:
 95 |         """
 96 |         Compute the output size given input size and target long side length.
 97 |         """
 98 |         scale = long_side_length * 1.0 / max(oldh, oldw)
 99 |         newh, neww = oldh * scale, oldw * scale
100 |         neww = int(neww + 0.5)
101 |         newh = int(newh + 0.5)
102 |         return (newh, neww)
103 | 


--------------------------------------------------------------------------------
/Annotation_Pipeline/Phase II/segment_anything_/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from setuptools import find_packages, setup
 8 | 
 9 | setup(
10 |     name="segment_anything",
11 |     version="1.0",
12 |     install_requires=[],
13 |     packages=find_packages(exclude="notebooks"),
14 |     extras_require={
15 |         "all": ["matplotlib", "pycocotools", "opencv-python", "onnx", "onnxruntime"],
16 |         "dev": ["flake8", "isort", "black", "mypy"],
17 |     },
18 | )
19 | 


--------------------------------------------------------------------------------
/Method/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/.DS_Store


--------------------------------------------------------------------------------
/Method/__pycache__/openai.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/__pycache__/openai.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/__pycache__/optim.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/__pycache__/optim.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/__pycache__/optim.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/__pycache__/optim.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/__pycache__/scheduler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/__pycache__/scheduler.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/__pycache__/scheduler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/__pycache__/scheduler.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/accelerators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/accelerators/__init__.py


--------------------------------------------------------------------------------
/Method/accelerators/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/accelerators/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/accelerators/__pycache__/accelerator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/accelerators/__pycache__/accelerator.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/accelerators/__pycache__/apex_ddp_accelerator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/accelerators/__pycache__/apex_ddp_accelerator.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/accelerators/accelerator.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276)
 3 | # Github: https://github.com/zengyan-97/X-VLM
 4 | # Copyright (c) 2022, ByteDance Inc.
 5 | # All rights reserved.
 6 | 
 7 | from logging import Logger
 8 | 
 9 | import torch
10 | from torch.optim import Optimizer
11 | 
12 | Net = torch.nn.Module
13 | 
14 | 
15 | class Accelerator:
16 |     def __init__(self, cfg, logger) -> None:
17 |         self.cfg = cfg
18 |         self.logger = logger
19 |     
20 |     def set_up(self, model: Net):
21 |         raise NotImplementedError("Set Up method not implement in Accelerator, please check! ")
22 | 
23 |     def broadcast(self):
24 |         raise NotImplementedError("Broadcast method not implement in Accelerator, please check! ")
25 | 
26 |     def backward_step(self, loss: torch.Tensor):
27 |         loss.backward()
28 | 
29 |     def optimizer_step(self, optimizer: Optimizer, model: Net, grad_norm: float) -> float:
30 |         total_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
31 |                                                         grad_norm)
32 |         return float(total_norm)
33 | 


--------------------------------------------------------------------------------
/Method/accelerators/apex_ddp_accelerator.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276)
  3 | # Github: https://github.com/zengyan-97/X-VLM
  4 | # Copyright (c) 2022, ByteDance Inc.
  5 | # All rights reserved.
  6 | 
  7 | import os
  8 | import random
  9 | import sys
 10 | from typing import Tuple, Union, Optional, Any
 11 | import numpy as np
 12 | 
 13 | import torch
 14 | import torch.distributed as distributed
 15 | from torch.optim import Optimizer
 16 | from torch.optim.lr_scheduler import LambdaLR
 17 | 
 18 | Net = torch.nn.Module
 19 | 
 20 | from .accelerator import Accelerator
 21 | 
 22 | # try:
 23 | from apex import amp
 24 | from apex.parallel import DistributedDataParallel as Apex_DDP
 25 | from apex.parallel import convert_syncbn_model
 26 | # except ImportError:
 27 | #     print('no apex! Please install from https://www.github.com/nvidia/apex')
 28 | 
 29 | 
 30 | class ApexDDPAccelerator(Accelerator):
 31 |     """
 32 |     ApexDDPAccelerator, use apex DistributedDataParallel
 33 |     """
 34 | 
 35 |     def __init__(self, cfg, logger):
 36 |         super().__init__(cfg, logger)
 37 |         self.accelerator_rng_seed = self.cfg.RNG_SEED
 38 |         self.accelerator_syncbn = self.cfg.SYNCBN
 39 |         self.accelerator_fp16_opt_level = self.cfg.FP16_OPT_LEVEL
 40 |         self.accelerator_fp16_loss_scale = self.cfg.FP16_LOSS_SCALE
 41 | 
 42 |     def set_up(self, model: Net, optimizer: Optimizer, lr_scheduler: LambdaLR,
 43 |                local_rank: int, world_size: int, rank: int) -> Tuple[Apex_DDP, Optimizer, LambdaLR]:
 44 |         """
 45 |         set up ApexDDPAccelerator, including process_group and apex_ddp
 46 |         """
 47 |         torch.backends.cudnn.benchmark = False
 48 |         random.seed(self.accelerator_rng_seed)
 49 |         np.random.seed(self.accelerator_rng_seed)
 50 |         torch.random.manual_seed(self.accelerator_rng_seed)
 51 |         torch.cuda.manual_seed_all(self.accelerator_rng_seed)
 52 |         master_address = os.environ.get('MASTER_ADDR', "127.0.0.1")
 53 |         master_port = int(os.environ.get('MASTER_PORT', 34171))
 54 | 
 55 |         torch.cuda.set_device(local_rank)
 56 |         model = model.cuda()
 57 |         if not torch.distributed.is_initialized():
 58 |             distributed.init_process_group(
 59 |                 backend='nccl',
 60 |                 init_method='tcp://{}:{}'.format(master_address, master_port),
 61 |                 world_size=world_size,
 62 |                 rank=rank,
 63 |                 group_name='mtorch')
 64 |             print(
 65 |                 f'ApexDDPAccelerator distributed, size: {world_size}, rank: {rank}, local rank: {local_rank}')
 66 |             sys.stdout.flush()
 67 | 
 68 |         self.broadcast(model)
 69 |         apex_model, optimizer = self.configure_ddp(model, optimizer)
 70 | 
 71 |         if self.accelerator_syncbn:
 72 |             apex_model = self.configure_sync_batchnorm(apex_model)
 73 |         return apex_model, optimizer, lr_scheduler
 74 | 
 75 |     def broadcast(self, model: Net, src=0) -> None:
 76 |         for v in model.state_dict().values():
 77 |             distributed.broadcast(v, src)
 78 | 
 79 |     def configure_ddp(self, model: Net, optimizer: Optimizer) -> Tuple[Apex_DDP, Optimizer]:
 80 |         model, optimizer = amp.initialize(model, optimizer,
 81 |                                           opt_level=self.accelerator_fp16_opt_level,
 82 |                                           keep_batchnorm_fp32=None,  # from True to None
 83 |                                           loss_scale=self.accelerator_fp16_loss_scale,
 84 |                                           max_loss_scale=1024.0,
 85 |                                           min_loss_scale=1.0)
 86 | 
 87 |         apex_model = Apex_DDP(model, delay_allreduce=True)
 88 |         self.ddp_model = apex_model
 89 |         return apex_model, optimizer
 90 | 
 91 |     def configure_sync_batchnorm(self, model: Net) -> Net:
 92 |         model = convert_syncbn_model(model)
 93 |         return model
 94 | 
 95 |     def backward_step(self, loss: torch.Tensor, optimizer: Optimizer):
 96 |         with amp.scale_loss(loss, optimizer) as scaled_loss:
 97 |             scaled_loss.backward()
 98 | 
 99 |     def optimizer_step(self, optimizer: Optimizer, model: Net, grad_norm: float) -> float:
100 |         total_norm = torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer),
101 |                                                     grad_norm)
102 |         return float(total_norm)
103 | 


--------------------------------------------------------------------------------
/Method/config.yaml:
--------------------------------------------------------------------------------
 1 | accelerator: {CLIP_GRAD_NORM: 1.0, FP16_LOSS_SCALE: dynamic, FP16_OPT_LEVEL: O1, GRAD_ACCUMULATE_STEPS: 1,
 2 |   RNG_SEED: 42, SYNCBN: false}
 3 | calc_image_bbox_loss: false
 4 | ckpt_frequent: 5
 5 | ckpt_frequent_step: 50000
 6 | embed_dim: 256
 7 | image_res: 224
 8 | images: {batch_size: 128, caption_key: caption, image_key: binary, is_image_rpath: false,
 9 |   num_workers: 4, tokenized: false}
10 | load_bertL_by_sep: false
11 | mask_prob: 0.25
12 | mask_whole_word: true
13 | max_masks: 8
14 | max_tokens: 40
15 | max_words: 40
16 | optimizer: {lr: 0.0001, lr_mult: 2, opt: adamW, weight_decay: 0.01}
17 | patch_size: 32
18 | regions: {batch_size: 128, caption_key: caption, image_key: binary, is_image_rpath: false,
19 |   iter_perc: 0.5, max_images: 48, max_regions: 5, min_perc_in_image: 0.5, num_workers: 4,
20 |   tokenized: false}
21 | schedular: {epochs: 41, lr: 0.0001, num_warmup_steps: 2500, sched: linear}
22 | skipgram_prb: 0.2
23 | skipgram_size: 3
24 | temp: 0.07
25 | text_config: configs/config_bert.json
26 | text_encoder: /storage_fast/mchu/blip2/VLM/X-VLM/data/bert
27 | train_dataset_size: 5114489
28 | train_file: [hdfs://path/to/coco, hdfs://path/to/vg, hdfs://path/to/sbu, hdfs://path/to/cc3m]
29 | train_file_regions: [hdfs://path/to/coco_objs, hdfs://path/to/vg_objs, hdfs://path/to/vg_regions]
30 | use_clip_vit: false
31 | use_roberta: false
32 | use_swin: true
33 | vision_config: configs/config_swinB_224.json
34 | 


--------------------------------------------------------------------------------
/Method/configs/config_bert.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertForMaskedLM"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "hidden_act": "gelu",
 7 |   "hidden_dropout_prob": 0.1,
 8 |   "hidden_size": 768,
 9 |   "initializer_range": 0.02,
10 |   "intermediate_size": 3072,
11 |   "layer_norm_eps": 1e-12,
12 |   "max_position_embeddings": 512,
13 |   "model_type": "bert",
14 |   "num_attention_heads": 12,
15 |   "num_hidden_layers": 12,
16 |   "pad_token_id": 0,
17 |   "type_vocab_size": 2,
18 |   "vocab_size": 30522,
19 |   "fusion_layer": 6,
20 |   "encoder_width": 1024
21 | }
22 | 


--------------------------------------------------------------------------------
/Method/configs/config_clipvitB.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ckpt": "data/clip-vit-base-patch16.bin",
 3 |   "vision_width": 768,
 4 |   "patch_size": 16,
 5 |   "hidden_act": "quick_gelu",
 6 |   "num_attention_heads": 12,
 7 |   "attention_dropout": 0.0,
 8 |   "intermediate_size": 3072,
 9 |   "num_hidden_layers": 12,
10 |   "local_attn_depth": 4
11 | }
12 | 


--------------------------------------------------------------------------------
/Method/configs/config_roberta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "RobertaForMaskedLM"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "bos_token_id": 0,
 7 |   "eos_token_id": 2,
 8 |   "hidden_act": "gelu",
 9 |   "hidden_dropout_prob": 0.1,
10 |   "hidden_size": 768,
11 |   "initializer_range": 0.02,
12 |   "intermediate_size": 3072,
13 |   "layer_norm_eps": 1e-05,
14 |   "max_position_embeddings": 514,
15 |   "model_type": "roberta",
16 |   "num_attention_heads": 12,
17 |   "num_hidden_layers": 12,
18 |   "pad_token_id": 1,
19 |   "type_vocab_size": 1,
20 |   "vocab_size": 50265,
21 |   "fusion_layer": 6,
22 |   "encoder_width": 1024
23 | }
24 | 


--------------------------------------------------------------------------------
/Method/configs/config_swinB_224.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ckpt": "/storage_fast/mchu/Multi-model/VLM/X-VLM/data/swin_base_patch4_window7_224_22k.pth",
 3 |   "vision_width": 1024,
 4 |   "image_res": 224,
 5 |   "window_size": 7,
 6 |   "embed_dim": 128,
 7 |   "depths": [ 2, 2, 18, 2 ],
 8 |   "num_heads": [ 4, 8, 16, 32 ]
 9 | }
10 | 


--------------------------------------------------------------------------------
/Method/configs/config_swinB_384.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ckpt": "/root/GeoText-1652/GeoText1652_model/swin_base_patch4_window7_224_22k.pth",
 3 |   "vision_width": 1024,
 4 |   "image_res": 384,
 5 |   "window_size": 12,
 6 |   "embed_dim": 128,
 7 |   "depths": [ 2, 2, 18, 2 ],
 8 |   "num_heads": [ 4, 8, 16, 32 ]
 9 | }
10 | 


--------------------------------------------------------------------------------
/Method/configs/config_swinB_480.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ckpt": "/storage_fast/mchu/Multi-model/VLM/X-VLM/data/swin_base_patch4_window7_224_22k.pth",
 3 |   "vision_width": 1024,
 4 |   "image_res": 480,
 5 |   "window_size": 15,
 6 |   "embed_dim": 128,
 7 |   "depths": [ 2, 2, 18, 2 ],
 8 |   "num_heads": [ 4, 8, 16, 32 ]
 9 | }
10 | 


--------------------------------------------------------------------------------
/Method/configs/re_bbox.yaml:
--------------------------------------------------------------------------------
 1 | train_file:  ["/root/GeoText-1652/GeoText1652_Dataset/train.json"]  
 2 | test_file: "/root/GeoText-1652/GeoText1652_Dataset/test_951_version.json" 
 3 | image_root: '/root/GeoText-1652/GeoText1652_Dataset/images'
 4 | 
 5 | ## Vision Encoder
 6 | vision_config: 'configs/config_swinB_384.json'
 7 | 
 8 | use_clip_vit: False
 9 | #image_res: 384
10 | #patch_size: 16
11 | 
12 | use_swin: True
13 | image_res: 384
14 | patch_size: 32
15 | 
16 | 
17 | ## Text Encoder
18 | use_roberta: False
19 | text_config: 'configs/config_bert.json'  # ['configs/config_bert.json', 'configs/config_roberta.json']
20 | text_encoder: '/root/GeoText-1652/GeoText1652_model/bert'  # ['data/bert-base-uncased', 'data/roberta-base']
21 | 
22 | 
23 | 
24 | ## Training
25 | batch_size_train: 24
26 | batch_size_test: 1
27 | batch_size_test_text: 512
28 | max_tokens: 50
29 | embed_dim: 256
30 | temp: 0.07
31 | k_test: 256
32 | 
33 | 
34 | ## Other Settings
35 | # optimizer: {opt: adamW, lr: 3e-6, weight_decay: 0.001, lr_mult: 2}
36 | # schedular: {sched: linear, lr: 3e-6, epochs: 3, num_warmup_steps: 0.1}
37 | 
38 | 
39 | optimizer: {opt: adamW, lr: 3e-5, weight_decay: 0.01, lr_mult: 2}
40 | schedular: {sched: linear, lr: 3e-5, epochs: 1, num_warmup_steps: 0.1}
41 | 


--------------------------------------------------------------------------------
/Method/configs/vlue-base-test/Grounding_bbox.yaml:
--------------------------------------------------------------------------------
 1 | test_file: ['data/vlue_released/refcoco+_vlue_test.json']
 2 | image_root: 'images/marvl/'
 3 | vlue_test: True
 4 | 
 5 | 
 6 | ## Vision Encoder
 7 | vision_config: 'configs/config_swinB_384.json'
 8 | 
 9 | use_clip_vit: False
10 | #image_res: 384
11 | #patch_size: 16
12 | 
13 | use_swin: True
14 | image_res: 384
15 | patch_size: 32
16 | 
17 | 
18 | ## Text Encoder
19 | use_roberta: False
20 | text_config: 'configs/config_bert.json'  # ['configs/config_bert.json', 'configs/config_roberta.json']
21 | text_encoder: 'data/bert-base-uncased'  # ['data/bert-base-uncased', 'data/roberta-base']
22 | 
23 | 
24 | ## Training
25 | batch_size: 20
26 | max_tokens: 40
27 | careful_hflip: True  # first check whether 'left' or 'right' in captions
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/Method/configs/vlue-base-test/Grounding_weakly.yaml:
--------------------------------------------------------------------------------
 1 | test_file: ['data/vlue_released/refcoco+_bbox100_vlue_test.json']
 2 | image_root: 'images/marvl/'
 3 | vlue_test: True
 4 | 
 5 | 
 6 | ## Vision Encoder
 7 | vision_config: 'configs/config_swinB_384.json'
 8 | 
 9 | use_clip_vit: False
10 | #image_res: 384
11 | #patch_size: 16
12 | 
13 | use_swin: True
14 | image_res: 384
15 | patch_size: 32
16 | 
17 | 
18 | ## Text Encoder
19 | use_roberta: False
20 | text_config: 'configs/config_bert.json'  # ['configs/config_bert.json', 'configs/config_roberta.json']
21 | text_encoder: 'data/bert-base-uncased'  # ['data/bert-base-uncased', 'data/roberta-base']
22 | 
23 | 
24 | ## Training
25 | batch_size: 20
26 | block_num: 9  # i.e. the layer to calculate cross-attn; adjust it to get best performance
27 | max_tokens: 40
28 | embed_dim: 256
29 | temp: 0.07
30 | 


--------------------------------------------------------------------------------
/Method/configs/vlue-base-test/NLVR.yaml:
--------------------------------------------------------------------------------
 1 | test_file: ['data/vlue_released/nlvr2_vlue_test.json']
 2 | image_root: 'images/marvl/'
 3 | 
 4 | ## Vision Encoder
 5 | vision_config: 'configs/config_swinB_384.json'
 6 | 
 7 | use_clip_vit: False
 8 | #image_res: 384
 9 | #patch_size: 16
10 | 
11 | use_swin: True
12 | image_res: 384
13 | patch_size: 32
14 | 
15 | 
16 | ## Text Encoder
17 | use_roberta: False
18 | text_config: 'configs/config_bert.json'  # ['configs/config_bert.json', 'configs/config_roberta.json']
19 | text_encoder: 'data/bert-base-uncased'  # ['data/bert-base-uncased', 'data/roberta-base']
20 | 
21 | 
22 | ## Training
23 | batch_size: 20  # 1
24 | 


--------------------------------------------------------------------------------
/Method/configs/vlue-base-test/Retrieval.yaml:
--------------------------------------------------------------------------------
 1 | test_file: 'data/vlue_released/itr_vlue_test.json'
 2 | image_root: 'images/marvl/'
 3 | 
 4 | ## Vision Encoder
 5 | vision_config: 'configs/config_swinB_384.json'
 6 | 
 7 | use_clip_vit: False
 8 | #image_res: 384
 9 | #patch_size: 16
10 | 
11 | use_swin: True
12 | image_res: 384
13 | patch_size: 32
14 | 
15 | 
16 | ## Text Encoder
17 | use_roberta: False
18 | text_config: 'configs/config_bert.json'  # ['configs/config_bert.json', 'configs/config_roberta.json']
19 | text_encoder: 'data/bert-base-uncased'  # ['data/bert-base-uncased', 'data/roberta-base']
20 | 
21 | 
22 | ## Training
23 | batch_size_test: 64 # 1
24 | batch_size_test_text: 64 # 1
25 | max_tokens: 40
26 | embed_dim: 256
27 | temp: 0.07
28 | k_test: 256
29 | 


--------------------------------------------------------------------------------
/Method/configs/vlue-base-test/VQA.yaml:
--------------------------------------------------------------------------------
 1 | test_file: ['data/vlue_released/vqa_vlue_test.json']
 2 | vqa_root: 'images/marvl/'
 3 | vg_root: 'images/visualgenome/'
 4 | answer_list: 'data/finetune/answer_list.json'
 5 | 
 6 | 
 7 | ## Vision Encoder
 8 | vision_config: 'configs/config_swinB_384.json'
 9 | 
10 | use_clip_vit: False
11 | #image_res: 384
12 | #patch_size: 16
13 | 
14 | use_swin: True
15 | image_res: 384
16 | patch_size: 32
17 | 
18 | ## Text Encoder
19 | use_roberta: False
20 | text_config: 'configs/config_bert.json'  # ['configs/config_bert.json', 'configs/config_roberta.json']
21 | text_encoder: 'data/bert-base-uncased'  # ['data/bert-base-uncased', 'data/roberta-base']
22 | 
23 | 
24 | ## Training
25 | num_dec_layers: 6
26 | batch_size_test: 32
27 | max_tokens: 40
28 | k_test: 128
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/Method/configs/vlue-base-test/VQA_480.yaml:
--------------------------------------------------------------------------------
 1 | test_file: ['data/vlue_released/vqa_vlue_test.json']
 2 | vqa_root: 'images/marvl/'
 3 | vg_root: 'images/visualgenome/'
 4 | answer_list: 'data/finetune/answer_list.json'
 5 | 
 6 | ## Vision Encoder
 7 | vision_config: 'configs/config_swinB_480.json'
 8 | 
 9 | use_clip_vit: False
10 | #image_res: 480
11 | #patch_size: 16
12 | 
13 | use_swin: True
14 | image_res: 480
15 | patch_size: 32
16 | 
17 | ## Text Encoder
18 | use_roberta: False
19 | text_config: 'configs/config_bert.json'  # ['configs/config_bert.json', 'configs/config_roberta.json']
20 | text_encoder: 'data/bert-base-uncased'  # ['data/bert-base-uncased', 'data/roberta-base']
21 | 
22 | 
23 | ## Training
24 | num_dec_layers: 6
25 | batch_size_test: 32
26 | max_tokens: 40
27 | k_test: 128
28 | 
29 | 


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/coco_karpathy_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/coco_karpathy_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/coco_karpathy_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/coco_karpathy_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/dist_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/dist_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/dist_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/dist_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/grounding_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/grounding_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/grounding_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/grounding_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/nlvr_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/nlvr_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/nlvr_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/nlvr_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/pretrain_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/pretrain_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/pretrain_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/pretrain_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/randaugment.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/randaugment.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/randaugment.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/randaugment.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/re_bbox_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/re_bbox_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/re_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/re_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/re_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/re_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/vqa_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/vqa_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/dataset/__pycache__/vqa_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/dataset/__pycache__/vqa_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/dataset/coco_karpathy_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import random
  4 | from collections import Counter
  5 | 
  6 | import torch
  7 | from torch.utils.data import Dataset
  8 | from torchvision.datasets.utils import download_url
  9 | 
 10 | from PIL import Image
 11 | 
 12 | from dataset.utils import pre_caption
 13 | 
 14 | 
 15 | class coco_karpathy_train(Dataset):
 16 |     def __init__(self, transform, image_root, ann_rpath, max_words=30, prompt=''):
 17 |         self.annotation = []
 18 |         for f in ann_rpath:
 19 |             self.annotation += json.load(open(f, 'r'))
 20 | 
 21 |         self.transform = transform
 22 |         self.image_root = image_root
 23 |         self.max_words = max_words      
 24 |         self.prompt = prompt
 25 |         
 26 |         self.img_ids = {}  
 27 |         n = 0
 28 |         for ann in self.annotation:
 29 |             img_id = ann['image_id']
 30 |             if img_id not in self.img_ids.keys():
 31 |                 self.img_ids[img_id] = n
 32 |                 n += 1    
 33 |         
 34 |     def __len__(self):
 35 |         return len(self.annotation)
 36 |     
 37 |     def __getitem__(self, index):    
 38 |         
 39 |         ann = self.annotation[index]
 40 |         
 41 |         image_path = os.path.join(self.image_root, ann['image'])
 42 |         image = Image.open(image_path).convert('RGB')   
 43 |         image = self.transform(image)
 44 |         
 45 |         caption = self.prompt + pre_caption(ann['caption'], self.max_words)
 46 | 
 47 |         return image, caption, self.img_ids[ann['image_id']]
 48 | 
 49 | 
 50 | class coco_karpathy_train_scst(Dataset):
 51 |     def __init__(self, transform, image_root, ann_rpath, max_words=30, prompt=''):
 52 |         self.annotation = []
 53 |         self.image_captions_map = {}
 54 | 
 55 |         for f in ann_rpath:
 56 |             for ann in json.load(open(f, 'r')):
 57 |                 self.annotation.append(ann)
 58 | 
 59 |                 if ann['image'] in self.image_captions_map.keys():
 60 |                     self.image_captions_map[ann['image']].append(ann['caption'])
 61 |                 else:
 62 |                     self.image_captions_map[ann['image']] = [ann['caption']]
 63 | 
 64 |         counter = Counter()
 65 |         for _, v in self.image_captions_map.items():
 66 |             counter[len(v)] += 1
 67 |         print("### image_captions_map, ", counter, flush=True)
 68 | 
 69 |         self.transform = transform
 70 |         self.image_root = image_root
 71 |         self.max_words = max_words
 72 |         self.prompt = prompt
 73 | 
 74 |         self.img_ids = {}
 75 |         n = 0
 76 |         for ann in self.annotation:
 77 |             img_id = ann['image_id']
 78 |             if img_id not in self.img_ids.keys():
 79 |                 self.img_ids[img_id] = n
 80 |                 n += 1
 81 | 
 82 |     def __len__(self):
 83 |         return len(self.annotation)
 84 | 
 85 |     def __getitem__(self, index):
 86 |         ann = self.annotation[index]
 87 | 
 88 |         image_path = os.path.join(self.image_root, ann['image'])
 89 |         image = Image.open(image_path).convert('RGB')
 90 |         image = self.transform(image)
 91 | 
 92 |         # w/o prompt
 93 |         captions_gt = [pre_caption(c, self.max_words) for c in self.image_captions_map[ann['image']]]
 94 | 
 95 |         return image, random.sample(captions_gt, 5)
 96 | 
 97 |     def collate_fn(self, batch_sample):
 98 |         batch = []
 99 |         for x in zip(*batch_sample):
100 |             batch.append(x)
101 | 
102 |         image_list, captions_gt_list = batch
103 | 
104 |         images = torch.stack(image_list)
105 | 
106 |         return images, captions_gt_list
107 | 
108 | 
109 | class coco_karpathy_caption_eval(Dataset):
110 |     def __init__(self, transform, image_root, ann_rpath, split):
111 |         self.annotation = json.load(open(ann_rpath, 'r'))
112 |         self.transform = transform
113 |         self.image_root = image_root
114 |         
115 |     def __len__(self):
116 |         return len(self.annotation)
117 |     
118 |     def __getitem__(self, index):    
119 |         
120 |         ann = self.annotation[index]
121 |         
122 |         image_path = os.path.join(self.image_root, ann['image'])
123 |         image = Image.open(image_path).convert('RGB')   
124 |         image = self.transform(image)          
125 |         
126 |         img_id = ann['image'].split('/')[-1].strip('.jpg').split('_')[-1]
127 |         
128 |         return image, int(img_id)
129 | 
130 | 


--------------------------------------------------------------------------------
/Method/dataset/dist_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276)
 4 | # Github: https://github.com/zengyan-97/X-VLM
 5 | # Copyright (c) 2022, ByteDance Inc.
 6 | # All rights reserved.
 7 | 
 8 | import sys
 9 | from typing import List, Any
10 | import warnings
11 | import random
12 | from itertools import cycle
13 | import torch
14 | from torch.utils.data import IterableDataset
15 | 
16 | from utils.hdfs_io import hopen, hlist_files
17 | 
18 | 
19 | class DistLineReadingDataset(IterableDataset):  # pylint: disable=W0223
20 |     """
21 |     iterate a set of folders.
22 |     """
23 |     def __init__(self,
24 |                  data_path: str,
25 |                  rank: int = 0,
26 |                  world_size: int = 1,
27 |                  shuffle: bool = False,
28 |                  repeat: bool = False):
29 |         super().__init__()
30 |         self.shuffle = shuffle
31 |         self.rank = rank
32 |         self.world_size = world_size
33 | 
34 |         self.files = hlist_files(data_path.split(','))
35 |         self.files = [f for f in self.files if f.find('_SUCCESS') < 0]
36 |         self.is_hdfs = data_path.startswith('hdfs')
37 | 
38 |         self.repeat = repeat
39 |         print('[DATA]--all dataset containing {} files.'.format(len(self.files)))
40 |         if len(self.files) % self.world_size != 0:
41 |             print('[DATA]--Whole dataset file num %s cannot split to worldsize %s ' %
42 |                      (len(self.files), self.world_size))
43 |         sys.stdout.flush()
44 | 
45 |     def generate(self):
46 |         if self.world_size == 1 or len(self.files) == 1:
47 |             cur_dataloader_files = self.files
48 |         else:
49 |             cur_dataloader_files = split_shard(
50 |                 self.files, self.rank, self.world_size)
51 | 
52 |         while True:
53 |             if self.shuffle:
54 |                 random.shuffle(cur_dataloader_files)
55 |             worker_info = torch.utils.data.get_worker_info()
56 | 
57 |             if worker_info is not None:
58 |                 if len(cur_dataloader_files) % worker_info.num_workers != 0:
59 |                     print('[DATA]--current dataloader %s file num %s cannot split to worker_num %s ' %
60 |                              (self.rank, len(cur_dataloader_files), worker_info.num_workers))
61 |                 cur_worker_files = split_shard(
62 |                     cur_dataloader_files, worker_info.id, worker_info.num_workers)
63 |                 if worker_info.id == 0:
64 |                     print("[DataLoader] --> Rank:{}  Workers:[{} ~ {}][{}]  Size of process file:{}  ...".format(
65 |                         self.rank, 0, worker_info.num_workers - 1, worker_info.id, len(cur_dataloader_files)))
66 |             else:
67 |                 cur_worker_files = cur_dataloader_files
68 | 
69 |             if self.shuffle:
70 |                 random.shuffle(cur_worker_files)
71 |             for filepath in cur_worker_files:
72 |                 if self.is_hdfs:
73 |                     with hopen(filepath, 'r') as reader:
74 |                         for line in reader:
75 |                             yield line.decode()
76 |                     continue
77 |                 with open(filepath, 'r') as reader:
78 |                     for line in reader:
79 |                         yield line
80 | 
81 |             if not self.repeat:
82 |                 break
83 | 
84 |     def __iter__(self):
85 |         return self.generate()  
86 | 
87 | 
88 | def split_shard(data: List[Any], shard_idx: int, shard_size: int):
89 |     num = len(data)
90 |     if num < shard_size:
91 |         raise RuntimeError("num:{} < shard size:{}".format(num, shard_size))
92 |     start_idx = (num * shard_idx) // shard_size
93 |     end_idx = (num * (shard_idx + 1)) // shard_size
94 |     return data[start_idx: end_idx]
95 | 


--------------------------------------------------------------------------------
/Method/dataset/re_bbox_dataset.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import math
  4 | import random
  5 | from random import random as rand
  6 | import torchvision.transforms as transforms
  7 | import torch
  8 | 
  9 | from torchvision.transforms.functional import hflip, resize
 10 | 
 11 | from dataset.utils import pre_caption
 12 | 
 13 | 
 14 | from torch.utils.data import Dataset
 15 | 
 16 | from PIL import Image
 17 | from PIL import ImageFile
 18 | 
 19 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 20 | Image.MAX_IMAGE_PIXELS = None
 21 | 
 22 | 
 23 | 
 24 | class re_dataset_bbox(Dataset):
 25 |     def __init__(self, ann_file, transform, image_root, max_words=30, mode='train', config=None):
 26 |         self.image_res = config['image_res']
 27 | 
 28 |         self.ann = []
 29 |         for f in ann_file:
 30 |             self.ann += json.load(open(f, 'r'))
 31 |         self.transform = transform
 32 |         self.image_root = image_root
 33 |         self.max_words = max_words
 34 |         self.mode = mode
 35 |         self.img_ids = {}
 36 | 
 37 |         n = 0
 38 |         for ann in self.ann:
 39 |             img_id = ann['image_id']
 40 |             if img_id not in self.img_ids.keys():
 41 |                 self.img_ids[img_id] = n
 42 |                 n += 1
 43 | 
 44 |     def __len__(self):
 45 |         return len(self.ann)
 46 |     
 47 |     def __getitem__(self, index):
 48 |         # print('Note: This part is in the dataset building process')
 49 | 
 50 |         ann = self.ann[index]
 51 |         caption = pre_caption(ann['caption'], self.max_words)
 52 |         # print("Here is the caption",caption)
 53 |         image_path = os.path.join(self.image_root, ann['image'])
 54 |         image = Image.open(image_path).convert('RGB')
 55 |         # print("Here is the original image", image)
 56 |         W, H = image.size
 57 | 
 58 |         # random crop
 59 |         target_bboxes = []
 60 |         sens = []
 61 |         for sen in ann["sentences"]:
 62 |             if sen is None:
 63 |                 sen = 'NONE'
 64 |             else:
 65 |                 sen = pre_caption(sen, self.max_words)
 66 |             sens.append(sen)
 67 |         # print("Here are the sens,",sens)
 68 |         no_bbox_value = -100
 69 |         no_bbox_tensor = [no_bbox_value, no_bbox_value, no_bbox_value, no_bbox_value]
 70 | 
 71 |         for box in ann["bboxes"]:
 72 |             if box is None:
 73 |                 target_bboxes.append(no_bbox_tensor)
 74 |             else:
 75 | 
 76 |                 target_bboxes.append(box)
 77 | 
 78 |         image = resize(image, [self.image_res, self.image_res], interpolation=Image.BICUBIC)
 79 |         image = self.transform(image)
 80 | 
 81 |         target_bboxes = torch.tensor(target_bboxes, dtype=torch.float32)
 82 | 
 83 |         return image, caption, self.img_ids[ann['image_id']], sens, target_bboxes
 84 | 
 85 | 
 86 | 
 87 | 
 88 | class re_eval_dataset(Dataset):
 89 |     def __init__(self, ann_file, transform, image_root, max_words=50):
 90 |         self.ann = json.load(open(ann_file, 'r'))
 91 |         self.transform = transform
 92 |         self.image_root = image_root
 93 |         self.max_words = max_words
 94 | 
 95 |         self.text = []
 96 |         self.image = []
 97 |         self.txt2img = {}
 98 |         self.img2txt = {}
 99 |         self.img2building = {}
100 | 
101 |         txt_id = 0
102 |         building_id = 0
103 |         ann_building = 0
104 |         for img_id, ann in enumerate(self.ann):
105 |             ann["building_id"] = ann["image_id"][:4]
106 |             if ann_building == 0:
107 |                 ann_building = ann["building_id"]
108 |             self.image.append(ann['image'])
109 |             self.img2txt[img_id] = []
110 |             self.img2building[img_id] = building_id
111 |             if ann_building != ann["building_id"]:
112 |                 ann_building = ann["building_id"]
113 |                 building_id += 1
114 |             for i, caption in enumerate(ann['caption']):
115 |                 self.text.append(pre_caption(caption, self.max_words))
116 |                 self.img2txt[img_id].append(txt_id)
117 |                 self.txt2img[txt_id] = img_id
118 |                 txt_id += 1
119 | 
120 |     def __len__(self):
121 |         return len(self.image)
122 | 
123 |     def __getitem__(self, index):
124 | 
125 |         image_path = os.path.join(self.image_root, self.ann[index]['image'])
126 |         image = Image.open(image_path).convert('RGB')
127 |         image = self.transform(image)
128 | 
129 |         return image, index
130 | 


--------------------------------------------------------------------------------
/Method/models/__init__.py:
--------------------------------------------------------------------------------
1 | from models.xvlm import XVLMBase
2 | from models.xvlm import build_mlp
3 | from models.xvlm import load_pretrained


--------------------------------------------------------------------------------
/Method/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/box_ops.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/box_ops.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/box_ops.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/box_ops.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/clip_vit.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/clip_vit.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/clip_vit.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/clip_vit.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/model_bbox.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_bbox.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/model_pretrain.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_pretrain.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/model_re_bbox.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_re_bbox.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/model_retrieval.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_retrieval.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/model_retrieval.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_retrieval.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/model_vqa.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/model_vqa.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/swin_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/swin_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/swin_transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/swin_transformer.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/tokenization_bert.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/tokenization_bert.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/tokenization_bert.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/tokenization_bert.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/tokenization_roberta.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/tokenization_roberta.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/tokenization_roberta.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/tokenization_roberta.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/vit.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/vit.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/vit.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/vit.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/xbert.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xbert.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/xbert.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xbert.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/xroberta.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xroberta.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/xroberta.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xroberta.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/xvlm.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xvlm.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/models/__pycache__/xvlm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/models/__pycache__/xvlm.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/models/box_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | """
  3 | Utilities for bounding box manipulation and GIoU.
  4 | """
  5 | import torch
  6 | from torchvision.ops.boxes import box_area
  7 | 
  8 | 
  9 | def box_cxcywh_to_xyxy(x):  # 这个用了
 10 |     x_c, y_c, w, h = x.unbind(-1)
 11 |     b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
 12 |          (x_c + 0.5 * w), (y_c + 0.5 * h)]
 13 |     return torch.stack(b, dim=-1)
 14 | 
 15 | 
 16 | def box_xyxy_to_cxcywh(x):
 17 |     x0, y0, x1, y1 = x.unbind(-1)
 18 |     b = [(x0 + x1) / 2, (y0 + y1) / 2,
 19 |          (x1 - x0), (y1 - y0)]
 20 |     return torch.stack(b, dim=-1)
 21 | 
 22 | 
 23 | # modified from torchvision to also return the union
 24 | def box_iou(boxes1, boxes2):
 25 |     area1 = box_area(boxes1)
 26 |     area2 = box_area(boxes2)
 27 | 
 28 |     lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
 29 |     rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
 30 | 
 31 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
 32 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
 33 | 
 34 |     union = area1[:, None] + area2 - inter
 35 | 
 36 |     iou = inter / union
 37 |     return iou, union
 38 | 
 39 | 
 40 | def generalized_box_iou(boxes1, boxes2):
 41 |     """
 42 |     Generalized IoU from https://giou.stanford.edu/
 43 | 
 44 |     The boxes should be in [x0, y0, x1, y1] format
 45 | 
 46 |     Returns a [N, M] pairwise matrix, where N = len(boxes1)
 47 |     and M = len(boxes2)
 48 |     """
 49 |     iou, union = box_iou(boxes1, boxes2)
 50 | 
 51 |     lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
 52 |     rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
 53 | 
 54 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
 55 |     area = wh[:, :, 0] * wh[:, :, 1]
 56 | 
 57 |     return iou - (area - union) / area
 58 | 
 59 | 
 60 | def bbox_iou(box1, box2, x1y1x2y2=True):
 61 |     if not x1y1x2y2:
 62 |         box1 = torch.cat((box1[..., :2] - box1[..., 2:] / 2, box1[..., :2] + box1[..., 2:] / 2), dim=-1)
 63 |         box2 = torch.cat((box2[..., :2] - box2[..., 2:] / 2, box2[..., :2] + box2[..., 2:] / 2), dim=-1)
 64 | 
 65 |     inter = torch.max(box1[:, None, :2], box2[:, :2]) - torch.min(box1[:, None, 2:], box2[:, 2:])
 66 |     inter = torch.clamp(inter, min=0)
 67 |     inter_area = inter[..., 0] * inter[..., 1]
 68 | 
 69 |     area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])
 70 |     area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
 71 |     union_area = area1[:, None] + area2 - inter_area
 72 | 
 73 |     return inter_area / union_area
 74 | 
 75 | def bbox_giou(box1, box2, x1y1x2y2=True):
 76 |     iou = bbox_iou(box1, box2, x1y1x2y2)
 77 | 
 78 |     if not x1y1x2y2:
 79 |         box1 = torch.cat((box1[..., :2] - box1[..., 2:] / 2, box1[..., :2] + box1[..., 2:] / 2), dim=-1)
 80 |         box2 = torch.cat((box2[..., :2] - box2[..., 2:] / 2, box2[..., :2] + box2[..., 2:] / 2), dim=-1)
 81 | 
 82 |     c = torch.max(box1[:, None, 2:], box2[:, 2:]) - torch.min(box1[:, None, :2], box2[:, :2])
 83 |     c_area = c[..., 0] * c[..., 1]
 84 | 
 85 |     return iou - (c_area - iou) / c_area
 86 | 
 87 | def bbox_diou(box1, box2, x1y1x2y2=True):
 88 |     iou = bbox_iou(box1, box2, x1y1x2y2)
 89 | 
 90 |     if not x1y1x2y2:
 91 |         box1 = torch.cat((box1[..., :2] - box1[..., 2:] / 2, box1[..., :2] + box1[..., 2:] / 2), dim=-1)
 92 |         box2 = torch.cat((box2[..., :2] - box2[..., 2:] / 2, box2[..., :2] + box2[..., 2:] / 2), dim=-1)
 93 | 
 94 |     center1 = (box1[:, :2] + box1[:, 2:]) / 2
 95 |     center2 = (box2[:, :2] + box2[:, 2:]) / 2
 96 |     inter_diag = torch.sum((center2 - center1) ** 2, dim=-1)
 97 | 
 98 |     c = torch.max(box1[:, None, 2:], box2[:, 2:]) - torch.min(box1[:, None, :2], box2[:, :2])
 99 |     c_diag = torch.sum(c ** 2, dim=-1)
100 | 
101 |     return iou - inter_diag / c_diag
102 | 
103 | 
104 | def bbox_ciou(box1, box2, x1y1x2y2=True):
105 |     # 首先计算DIoU
106 |     diou = bbox_diou(box1, box2, x1y1x2y2)
107 |     
108 |     if x1y1x2y2:
109 |         # 转换为[Cx, Cy, W, H]格式
110 |         box1 = torch.cat(((box1[..., 2:] + box1[..., :2]) / 2, box1[..., 2:] - box1[..., :2]), dim=-1)
111 |         box2 = torch.cat(((box2[..., 2:] + box2[..., :2]) / 2, box2[..., 2:] - box2[..., :2]), dim=-1)
112 |     
113 |     w1, h1 = box1[:, 2], box1[:, 3]
114 |     w2, h2 = box2[:, 2], box2[:, 3]
115 |     
116 |     # 计算长宽比的一致性
117 |     v = (4 / (math.pi ** 2)) * torch.pow(torch.atan(w1 / h1) - torch.atan(w2 / h2), 2)
118 |     
119 |     # 计算alpha参数，以避免当重叠区域为0时长宽比项过高的惩罚
120 |     with torch.no_grad():
121 |         alpha = v / (1 - diou + v)
122 |     
123 |     # 最终的CIoU值包括DIoU值和长宽比的一致性
124 |     ciou = diou - (alpha * v)
125 |     return ciou


--------------------------------------------------------------------------------
/Method/optim.py:
--------------------------------------------------------------------------------
 1 | from transformers.optimization import AdamW
 2 | 
 3 | 
 4 | def create_optimizer(args, model):
 5 |     lr = args.lr
 6 |     wd = args.weight_decay
 7 |     lr_mult = getattr(args, 'lr_mult', 1)
 8 |     print("### lr_mult, ", lr_mult)
 9 | 
10 |     optimizer_grouped_parameters = [
11 |         {"params": [], "weight_decay": wd, "lr": lr},
12 |         {"params": [], "weight_decay": 0.0, "lr": lr},
13 |         {"params": [], "weight_decay": wd, "lr": lr * lr_mult},
14 |         {"params": [], "weight_decay": 0.0, "lr": lr * lr_mult}
15 |     ]
16 | 
17 |     no_decay = {"bias",
18 |         "LayerNorm.bias",
19 |         "LayerNorm.weight",
20 |         "norm.bias",
21 |         "norm.weight",
22 |         "norm1.bias",
23 |         "norm1.weight",
24 |         "norm2.bias",
25 |         "norm2.weight"}
26 | 
27 |     if hasattr(model, 'init_params'):
28 |         large_lr = model.init_params
29 |         print("### model has 'init_params', ", len(large_lr))
30 |     else:
31 |         large_lr = {}
32 | 
33 |     for n, p in model.named_parameters():
34 |         if not p.requires_grad:
35 |             continue  # frozen weights
36 | 
37 |         if any(nd in n for nd in no_decay):
38 |             if n in large_lr:
39 |                 optimizer_grouped_parameters[3]['params'].append(p)
40 |             else:
41 |                 optimizer_grouped_parameters[1]['params'].append(p)
42 |         else:  # decay
43 |             if n in large_lr:
44 |                 optimizer_grouped_parameters[2]['params'].append(p)
45 |             else:
46 |                 optimizer_grouped_parameters[0]['params'].append(p)
47 | 
48 |     optimizer = AdamW(optimizer_grouped_parameters, lr=lr, eps=1e-8, betas=(0.9, 0.98))
49 | 
50 |     return optimizer
51 | 


--------------------------------------------------------------------------------
/Method/output/all_output_eva/config.yaml:
--------------------------------------------------------------------------------
 1 | train_file: [/root/GeoText-1652/GeoText1652_Dataset/train.json]
 2 | test_file: /root/GeoText-1652/GeoText1652_Dataset/test_951_version.json
 3 | image_root: /root/GeoText-1652/GeoText1652_Dataset/images
 4 | 
 5 | ## Vision Encoder
 6 | vision_config: configs/config_swinB_384.json
 7 | 
 8 | use_clip_vit: false
 9 | #image_res: 384
10 | #patch_size: 16
11 | 
12 | use_swin: true
13 | image_res: 384
14 | patch_size: 32
15 | 
16 | 
17 | ## Text Encoder
18 | use_roberta: false
19 | text_config: configs/config_bert.json    # ['configs/config_bert.json', 'configs/config_roberta.json']
20 | text_encoder: /root/GeoText-1652/GeoText1652_model/bert    # ['data/bert-base-uncased', 'data/roberta-base']
21 | 
22 | 
23 | 
24 | ## Training
25 | batch_size_train: 24
26 | batch_size_test: 1
27 | batch_size_test_text: 512
28 | max_tokens: 50
29 | embed_dim: 256
30 | temp: 0.07
31 | k_test: 256
32 | 
33 | 
34 | ## Other Settings
35 | # optimizer: {opt: adamW, lr: 3e-6, weight_decay: 0.001, lr_mult: 2}
36 | # schedular: {sched: linear, lr: 3e-6, epochs: 3, num_warmup_steps: 0.1}
37 | 
38 | 
39 | optimizer: {opt: adamW, lr: 3e-5, weight_decay: 0.01, lr_mult: 2}
40 | schedular: {sched: linear, lr: 3e-5, epochs: 1, num_warmup_steps: 0.1}
41 | 


--------------------------------------------------------------------------------
/Method/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch -f https://download.pytorch.org/whl/cu118
 2 | torchvision -f https://download.pytorch.org/whl/cu118
 3 | torchaudio -f https://download.pytorch.org/whl/cu118
 4 | 
 5 | 
 6 | 
 7 | 
 8 | timm==0.4.9
 9 | transformers==4.12.5
10 | ruamel_yaml
11 | opencv-python-headless
12 | scikit-image
13 | matplotlib
14 | chardet
15 | charset_normalizer
16 | PyOpenGL
17 | 
18 | 
19 | 
20 | pycocotools
21 | pycocoevalcap
22 | 
23 | 


--------------------------------------------------------------------------------
/Method/scheduler.py:
--------------------------------------------------------------------------------
 1 | from torch.optim.lr_scheduler import LambdaLR
 2 | 
 3 | 
 4 | def create_scheduler(args, optimizer):
 5 |     if 'num_training_steps' not in args:
 6 |         args['num_training_steps'] = args['epochs'] * args['step_per_epoch']
 7 |     print("### num_training_steps, ", args['num_training_steps'], flush=True)
 8 | 
 9 |     if isinstance(args['num_warmup_steps'], float):
10 |         assert 0 <= args['num_warmup_steps'] < 1
11 |         args['num_warmup_steps'] = int(args['num_training_steps'] * args['num_warmup_steps'])
12 |     print("### num_warmup_steps, ", args['num_warmup_steps'], flush=True)
13 | 
14 |     if args.sched == 'linear':
15 |         def lr_lambda(current_step: int):
16 |             if current_step < args.num_warmup_steps:
17 |                 return float(current_step) / float(max(1, args.num_warmup_steps))
18 |             return max(
19 |                 0.0, float(args.num_training_steps - current_step) / float(
20 |                     max(1, args.num_training_steps - args.num_warmup_steps))
21 |             )
22 | 
23 |         lr_scheduler = LambdaLR(optimizer, lr_lambda, last_epoch=-1)
24 | 
25 |     else:
26 |         raise NotImplementedError(f"args.sched == {args.sched}")
27 | 
28 |     return lr_scheduler
29 | 


--------------------------------------------------------------------------------
/Method/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/utils/__pycache__/checkpointer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/checkpointer.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/utils/__pycache__/checkpointer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/checkpointer.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/utils/__pycache__/hdfs_io.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/hdfs_io.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/utils/__pycache__/hdfs_io.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/hdfs_io.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/utils/__pycache__/torch_io.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/torch_io.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/utils/__pycache__/torch_io.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/__pycache__/torch_io.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/utils/checkpointer.py:
--------------------------------------------------------------------------------
 1 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276)
 2 | # Github: https://github.com/zengyan-97/X-VLM
 3 | # Copyright (c) 2022, ByteDance Inc.
 4 | # All rights reserved.
 5 | 
 6 | from typing import Union, Dict, List, Tuple, Any, Callable
 7 | import logging
 8 | import os
 9 | import re
10 | import time
11 | 
12 | import torch
13 | 
14 | from utils.hdfs_io import hexists, hmkdir, hcopy
15 | from utils.torch_io import save as hdfs_torch_save
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | class Checkpointer:
20 |     def __init__(self,
21 |                  serialization_dir: str = ".output") -> None:
22 |         self._serialization_dir = serialization_dir
23 |         if not hexists(self._serialization_dir):
24 |             hmkdir(self._serialization_dir)
25 | 
26 |     def save_checkpoint(self,
27 |                         epoch: Union[int, str],
28 |                         model_state: Dict[str, Any],
29 |                         training_states: Dict[str, Any],
30 |                         step: int = -1) -> None:
31 |         """
32 |         Save ckpt to local or HDFS
33 |         """
34 |         if step > 0:
35 |             model_path = os.path.join(
36 |                 self._serialization_dir, "model_state_step_{}.th".format(step))
37 |             hdfs_torch_save(model_state, model_path)
38 | 
39 |         else:
40 |             model_path = os.path.join(
41 |                 self._serialization_dir, "model_state_epoch_{}.th".format(epoch))
42 | 
43 |             training_path = os.path.join(self._serialization_dir,
44 |                                          "training_state_latest.th")
45 |             hdfs_torch_save(model_state, model_path)
46 |             hdfs_torch_save({**training_states, "epoch": epoch}, training_path)
47 | 


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/cider/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/cider/cider.py:
--------------------------------------------------------------------------------
 1 | # Filename: cider.py
 2 | #
 3 | #
 4 | # Description: Describes the class to compute the CIDEr
 5 | # (Consensus-Based Image Description Evaluation) Metric
 6 | #          by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
 7 | #
 8 | # Creation Date: Sun Feb  8 14:16:54 2015
 9 | #
10 | # Authors: Ramakrishna Vedantam <vrama91@vt.edu> and
11 | # Tsung-Yi Lin <tl483@cornell.edu>
12 | from __future__ import absolute_import
13 | from __future__ import division
14 | from __future__ import print_function
15 | 
16 | from .cider_scorer import CiderScorer
17 | 
18 | 
19 | class Cider:
20 |     """
21 |     Main Class to compute the CIDEr metric
22 | 
23 |     """
24 |     def __init__(self, n=4, df="corpus"):
25 |         """
26 |         Initialize the CIDEr scoring function
27 |         : param n (int): n-gram size
28 |         : param df (string): specifies where to get the IDF values from
29 |                     takes values 'corpus', 'coco-train'
30 |         : return: None
31 |         """
32 |         # set cider to sum over 1 to 4-grams
33 |         self._n = n
34 |         self._df = df
35 |         self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)
36 | 
37 |     def compute_score(self, gts, res):
38 |         """
39 |         Main function to compute CIDEr score
40 |         : param  gts (dict) : {image:tokenized reference sentence}
41 |         : param res (dict)  : {image:tokenized candidate sentence}
42 |         : return: cider (float) : computed CIDEr score for the corpus
43 |         """
44 | 
45 |         # clear all the previous hypos and refs
46 |         self.cider_scorer.clear()
47 | 
48 |         for res_id in res:
49 | 
50 |             hypo = res_id['caption']
51 |             ref = gts[res_id['image_id']]
52 | 
53 |             # Sanity check.
54 |             assert(type(hypo) is list)
55 |             assert(len(hypo) == 1)
56 |             assert(type(ref) is list)
57 |             assert(len(ref) > 0)
58 |             self.cider_scorer += (hypo[0], ref)
59 | 
60 |         (score, scores) = self.cider_scorer.compute_score()
61 | 
62 |         return score, scores
63 | 
64 |     def method(self):
65 |         return "CIDEr"
66 | 


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/ciderD/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD_scorer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD_scorer.cpython-37.pyc


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD_scorer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalGeo/GeoText-1652/288cde776213cba91e87e2f35d07ebce81d04875/Method/utils/cider/pyciderevalcap/ciderD/__pycache__/ciderD_scorer.cpython-38.pyc


--------------------------------------------------------------------------------
/Method/utils/cider/pyciderevalcap/ciderD/ciderD.py:
--------------------------------------------------------------------------------
 1 | # Filename: ciderD.py
 2 | #
 3 | # Description: Describes the class to compute the CIDEr-D (Consensus-Based Image Description Evaluation) Metric
 4 | #               by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
 5 | #
 6 | # Creation Date: Sun Feb  8 14:16:54 2015
 7 | #
 8 | # Authors: Ramakrishna Vedantam <vrama91@vt.edu> and Tsung-Yi Lin <tl483@cornell.edu>
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | from .ciderD_scorer import CiderScorer
14 | import pdb
15 | 
16 | class CiderD:
17 |     """
18 |     Main Class to compute the CIDEr metric
19 | 
20 |     """
21 |     def __init__(self, n=4, sigma=6.0, df="corpus"):
22 |         # set cider to sum over 1 to 4-grams
23 |         self._n = n
24 |         # set the standard deviation parameter for gaussian penalty
25 |         self._sigma = sigma
26 |         # set which where to compute document frequencies from
27 |         self._df = df
28 |         self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)
29 | 
30 |     def compute_score(self, gts, res):
31 |         """
32 |         Main function to compute CIDEr score
33 |         :param  hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence>
34 |                 ref_for_image (dict)  : dictionary with key <image> and value <tokenized reference sentence>
35 |         :return: cider (float) : computed CIDEr score for the corpus
36 |         """
37 | 
38 |         # clear all the previous hypos and refs
39 |         tmp_cider_scorer = self.cider_scorer.copy_empty()
40 |         tmp_cider_scorer.clear()
41 |         for res_id in res:
42 | 
43 |             hypo = res_id['caption']
44 |             ref = gts[res_id['image_id']]
45 | 
46 |             # Sanity check.
47 |             assert(type(hypo) is list)
48 |             assert(len(hypo) == 1)
49 |             assert(type(ref) is list)
50 |             assert(len(ref) > 0)
51 |             tmp_cider_scorer += (hypo[0], ref)
52 | 
53 |         (score, scores) = tmp_cider_scorer.compute_score()
54 | 
55 |         return score, scores
56 | 
57 |     def method(self):
58 |         return "CIDEr-D"
59 | 


--------------------------------------------------------------------------------
/Method/utils/hdfs_io.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276)
  4 | # Github: https://github.com/zengyan-97/X-VLM
  5 | # Copyright (c) 2022, ByteDance Inc.
  6 | # All rights reserved.
  7 | 
  8 | import sys
  9 | from typing import IO, Any, List
 10 | 
 11 | import shutil
 12 | import subprocess
 13 | from contextlib import contextmanager
 14 | import os
 15 | import glob
 16 | import threading
 17 | 
 18 | HADOOP_BIN = 'HADOOP_ROOT_LOGGER=ERROR,console /SET/PATH/TO/hadoop/bin/hdfs'
 19 | 
 20 | __all__ = ['hlist_files', 'hopen', 'hexists', 'hmkdir']
 21 | 
 22 | 
 23 | @contextmanager  # type: ignore
 24 | def hopen(hdfs_path: str, mode: str = "r") -> IO[Any]:
 25 |     """
 26 |         open a file on hdfs with contextmanager.
 27 | 
 28 |         Args:
 29 |             mode (str): supports ["r", "w", "wa"]
 30 |     """
 31 |     pipe = None
 32 |     if mode.startswith("r"):
 33 |         pipe = subprocess.Popen(
 34 |             "{} dfs -text {}".format(HADOOP_BIN, hdfs_path), shell=True, stdout=subprocess.PIPE)
 35 |         yield pipe.stdout
 36 |         pipe.stdout.close()  # type: ignore
 37 |         pipe.wait()
 38 |         return
 39 |     if mode == "wa" or mode == "a":
 40 |         pipe = subprocess.Popen(
 41 |             "{} dfs -appendToFile - {}".format(HADOOP_BIN, hdfs_path), shell=True, stdin=subprocess.PIPE)
 42 |         yield pipe.stdin
 43 |         pipe.stdin.close()  # type: ignore
 44 |         pipe.wait()
 45 |         return
 46 |     if mode.startswith("w"):
 47 |         pipe = subprocess.Popen(
 48 |             "{} dfs -put -f - {}".format(HADOOP_BIN, hdfs_path), shell=True, stdin=subprocess.PIPE)
 49 |         yield pipe.stdin
 50 |         pipe.stdin.close()  # type: ignore
 51 |         pipe.wait()
 52 |         return
 53 |     raise RuntimeError("unsupported io mode: {}".format(mode))
 54 | 
 55 | 
 56 | def hlist_files(folders: List[str]) -> List[str]:
 57 |     files = []
 58 |     for folder in folders:
 59 |         if folder.startswith('hdfs'):
 60 |             pipe = subprocess.Popen("{} dfs -ls {}".format(HADOOP_BIN, folder), shell=True,
 61 |                                     stdout=subprocess.PIPE)
 62 |             # output, _ = pipe.communicate()
 63 |             for line in pipe.stdout:  # type: ignore
 64 |                 line = line.strip()
 65 |                 # drwxr-xr-x   - user group  4 file
 66 |                 if len(line.split()) < 5:
 67 |                     continue
 68 |                 files.append(line.split()[-1].decode("utf8"))
 69 |             pipe.stdout.close()  # type: ignore
 70 |             pipe.wait()
 71 |         else:
 72 |             if os.path.isdir(folder):
 73 |                 files.extend([os.path.join(folder, d) for d in os.listdir(folder)])
 74 |             elif os.path.isfile(folder):
 75 |                 files.append(folder)
 76 |             else:
 77 |                 print('Path {} is invalid'.format(folder))
 78 |                 sys.stdout.flush()
 79 | 
 80 |     return files
 81 | 
 82 | 
 83 | def hexists(file_path: str) -> bool:
 84 |     """ hdfs capable to check whether a file_path is exists """
 85 |     if file_path.startswith('hdfs'):
 86 |         return os.system("{} dfs -test -e {}".format(HADOOP_BIN, file_path)) == 0
 87 |     return os.path.exists(file_path)
 88 | 
 89 | 
 90 | def hmkdir(file_path: str) -> bool:
 91 |     """ hdfs mkdir """
 92 |     if file_path.startswith('hdfs'):
 93 |         os.system("{} dfs -mkdir -p {}".format(HADOOP_BIN, file_path))  # exist ok
 94 |     else:
 95 |         if not os.path.exists(file_path):
 96 |             os.mkdir(file_path)
 97 |     return True
 98 | 
 99 | 
100 | def hcopy(from_path: str, to_path: str) -> bool:
101 |     """ hdfs copy """
102 |     if to_path.startswith("hdfs"):
103 |         if from_path.startswith("hdfs"):
104 |             os.system("{} dfs -cp -f {} {}".format(HADOOP_BIN, from_path, to_path))
105 |         else:
106 |             os.system("{} dfs -copyFromLocal -f {} {}".format(HADOOP_BIN, from_path, to_path))
107 |     else:
108 |         if from_path.startswith("hdfs"):
109 |             os.system("{} dfs -text {} > {}".format(HADOOP_BIN, from_path, to_path))
110 |         else:
111 |             shutil.copy(from_path, to_path)
112 |     return True
113 | 
114 | 


--------------------------------------------------------------------------------
/Method/utils/torch_io.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (https://arxiv.org/abs/2111.08276)
 4 | # Github: https://github.com/zengyan-97/X-VLM
 5 | # Copyright (c) 2022, ByteDance Inc.
 6 | # All rights reserved.
 7 | 
 8 | import io
 9 | import torch
10 | 
11 | from .hdfs_io import hopen
12 | 
13 | 
14 | def load(filepath: str, **kwargs):
15 |     """ load model """
16 |     if not filepath.startswith("hdfs://"):
17 |         return torch.load(filepath, **kwargs)
18 |     with hopen(filepath, "rb") as reader:
19 |         accessor = io.BytesIO(reader.read())
20 |         state_dict = torch.load(accessor, **kwargs)
21 |         del accessor
22 |         return state_dict
23 | 
24 | 
25 | def save(obj, filepath: str, **kwargs):
26 |     """ save model """
27 |     if filepath.startswith("hdfs://"):
28 |         with hopen(filepath, "wb") as writer:
29 |             torch.save(obj, writer, **kwargs)
30 |     else:
31 |         torch.save(obj, filepath, **kwargs)
32 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch -f https://download.pytorch.org/whl/cu118
 2 | torchvision -f https://download.pytorch.org/whl/cu118
 3 | torchaudio -f https://download.pytorch.org/whl/cu118
 4 | 
 5 | 
 6 | 
 7 | 
 8 | timm==0.4.9
 9 | transformers==4.12.5
10 | ruamel_yaml
11 | opencv-python-headless
12 | scikit-image
13 | matplotlib
14 | chardet
15 | charset_normalizer
16 | PyOpenGL
17 | 
18 | 
19 | 
20 | pycocotools
21 | pycocoevalcap
22 | 
23 | 


--------------------------------------------------------------------------------